mirror of
https://github.com/postgres/postgres.git
synced 2025-11-10 17:42:29 +03:00
Pgindent run for 8.0.
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.93 2004/08/29 04:12:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.94 2004/08/29 05:06:39 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* The old interface functions have been converted to macros
|
||||
@@ -468,17 +468,19 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If the attribute number is 0, then we are supposed to return
|
||||
* the entire tuple as a row-type Datum. (Using zero for this
|
||||
* purpose is unclean since it risks confusion with "invalid attr"
|
||||
* result codes, but it's not worth changing now.)
|
||||
* If the attribute number is 0, then we are supposed to
|
||||
* return the entire tuple as a row-type Datum. (Using zero
|
||||
* for this purpose is unclean since it risks confusion with
|
||||
* "invalid attr" result codes, but it's not worth changing
|
||||
* now.)
|
||||
*
|
||||
* We have to make a copy of the tuple so we can safely insert the
|
||||
* Datum overhead fields, which are not set in on-disk tuples.
|
||||
* We have to make a copy of the tuple so we can safely insert
|
||||
* the Datum overhead fields, which are not set in on-disk
|
||||
* tuples.
|
||||
*/
|
||||
case InvalidAttrNumber:
|
||||
{
|
||||
HeapTupleHeader dtup;
|
||||
HeapTupleHeader dtup;
|
||||
|
||||
dtup = (HeapTupleHeader) palloc(tup->t_len);
|
||||
memcpy((char *) dtup, (char *) tup->t_data, tup->t_len);
|
||||
@@ -555,7 +557,7 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest)
|
||||
* construct a tuple from the given values[] and nulls[] arrays
|
||||
*
|
||||
* Null attributes are indicated by a 'n' in the appropriate byte
|
||||
* of nulls[]. Non-null attributes are indicated by a ' ' (space).
|
||||
* of nulls[]. Non-null attributes are indicated by a ' ' (space).
|
||||
* ----------------
|
||||
*/
|
||||
HeapTuple
|
||||
@@ -580,7 +582,7 @@ heap_formtuple(TupleDesc tupleDescriptor,
|
||||
|
||||
/*
|
||||
* Check for nulls and embedded tuples; expand any toasted attributes
|
||||
* in embedded tuples. This preserves the invariant that toasting can
|
||||
* in embedded tuples. This preserves the invariant that toasting can
|
||||
* only go one level deep.
|
||||
*
|
||||
* We can skip calling toast_flatten_tuple_attribute() if the attribute
|
||||
@@ -620,7 +622,7 @@ heap_formtuple(TupleDesc tupleDescriptor,
|
||||
len += ComputeDataSize(tupleDescriptor, values, nulls);
|
||||
|
||||
/*
|
||||
* Allocate and zero the space needed. Note that the tuple body and
|
||||
* Allocate and zero the space needed. Note that the tuple body and
|
||||
* HeapTupleData management structure are allocated in one chunk.
|
||||
*/
|
||||
tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + len);
|
||||
@@ -683,9 +685,9 @@ heap_modifytuple(HeapTuple tuple,
|
||||
* allocate and fill values and nulls arrays from either the tuple or
|
||||
* the repl information, as appropriate.
|
||||
*
|
||||
* NOTE: it's debatable whether to use heap_deformtuple() here or
|
||||
* just heap_getattr() only the non-replaced colums. The latter could
|
||||
* win if there are many replaced columns and few non-replaced ones.
|
||||
* NOTE: it's debatable whether to use heap_deformtuple() here or just
|
||||
* heap_getattr() only the non-replaced colums. The latter could win
|
||||
* if there are many replaced columns and few non-replaced ones.
|
||||
* However, heap_deformtuple costs only O(N) while the heap_getattr
|
||||
* way would cost O(N^2) if there are many non-replaced columns, so it
|
||||
* seems better to err on the side of linear cost.
|
||||
@@ -763,10 +765,11 @@ heap_deformtuple(HeapTuple tuple,
|
||||
bool slow = false; /* can we use/set attcacheoff? */
|
||||
|
||||
natts = tup->t_natts;
|
||||
|
||||
/*
|
||||
* In inheritance situations, it is possible that the given tuple actually
|
||||
* has more fields than the caller is expecting. Don't run off the end
|
||||
* of the caller's arrays.
|
||||
* In inheritance situations, it is possible that the given tuple
|
||||
* actually has more fields than the caller is expecting. Don't run
|
||||
* off the end of the caller's arrays.
|
||||
*/
|
||||
natts = Min(natts, tdesc_natts);
|
||||
|
||||
@@ -787,9 +790,7 @@ heap_deformtuple(HeapTuple tuple,
|
||||
nulls[attnum] = ' ';
|
||||
|
||||
if (!slow && att[attnum]->attcacheoff >= 0)
|
||||
{
|
||||
off = att[attnum]->attcacheoff;
|
||||
}
|
||||
else
|
||||
{
|
||||
off = att_align(off, att[attnum]->attalign);
|
||||
@@ -807,8 +808,8 @@ heap_deformtuple(HeapTuple tuple,
|
||||
}
|
||||
|
||||
/*
|
||||
* If tuple doesn't have all the atts indicated by tupleDesc, read
|
||||
* the rest as null
|
||||
* If tuple doesn't have all the atts indicated by tupleDesc, read the
|
||||
* rest as null
|
||||
*/
|
||||
for (; attnum < tdesc_natts; attnum++)
|
||||
{
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/indextuple.c,v 1.70 2004/08/29 04:12:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/indextuple.c,v 1.71 2004/08/29 05:06:39 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -162,9 +162,9 @@ index_formtuple(TupleDesc tupleDescriptor,
|
||||
if ((size & INDEX_SIZE_MASK) != size)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("index row requires %lu bytes, maximum size is %lu",
|
||||
(unsigned long) size,
|
||||
(unsigned long) INDEX_SIZE_MASK)));
|
||||
errmsg("index row requires %lu bytes, maximum size is %lu",
|
||||
(unsigned long) size,
|
||||
(unsigned long) INDEX_SIZE_MASK)));
|
||||
|
||||
infomask |= size;
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/printtup.c,v 1.84 2004/08/29 04:12:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/printtup.c,v 1.85 2004/08/29 05:06:39 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -356,7 +356,7 @@ printtup(HeapTuple tuple, TupleDesc typeinfo, DestReceiver *self)
|
||||
|
||||
outputstr = DatumGetCString(FunctionCall3(&thisState->finfo,
|
||||
attr,
|
||||
ObjectIdGetDatum(thisState->typioparam),
|
||||
ObjectIdGetDatum(thisState->typioparam),
|
||||
Int32GetDatum(typeinfo->attrs[i]->atttypmod)));
|
||||
pq_sendcountedtext(&buf, outputstr, strlen(outputstr), false);
|
||||
pfree(outputstr);
|
||||
@@ -368,7 +368,7 @@ printtup(HeapTuple tuple, TupleDesc typeinfo, DestReceiver *self)
|
||||
|
||||
outputbytes = DatumGetByteaP(FunctionCall2(&thisState->finfo,
|
||||
attr,
|
||||
ObjectIdGetDatum(thisState->typioparam)));
|
||||
ObjectIdGetDatum(thisState->typioparam)));
|
||||
/* We assume the result will not have been toasted */
|
||||
pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4);
|
||||
pq_sendbytes(&buf, VARDATA(outputbytes),
|
||||
@@ -458,7 +458,7 @@ printtup_20(HeapTuple tuple, TupleDesc typeinfo, DestReceiver *self)
|
||||
|
||||
outputstr = DatumGetCString(FunctionCall3(&thisState->finfo,
|
||||
attr,
|
||||
ObjectIdGetDatum(thisState->typioparam),
|
||||
ObjectIdGetDatum(thisState->typioparam),
|
||||
Int32GetDatum(typeinfo->attrs[i]->atttypmod)));
|
||||
pq_sendcountedtext(&buf, outputstr, strlen(outputstr), true);
|
||||
pfree(outputstr);
|
||||
@@ -579,7 +579,7 @@ debugtup(HeapTuple tuple, TupleDesc typeinfo, DestReceiver *self)
|
||||
|
||||
value = DatumGetCString(OidFunctionCall3(typoutput,
|
||||
attr,
|
||||
ObjectIdGetDatum(typioparam),
|
||||
ObjectIdGetDatum(typioparam),
|
||||
Int32GetDatum(typeinfo->attrs[i]->atttypmod)));
|
||||
|
||||
printatt((unsigned) i + 1, typeinfo->attrs[i], value);
|
||||
@@ -672,7 +672,7 @@ printtup_internal_20(HeapTuple tuple, TupleDesc typeinfo, DestReceiver *self)
|
||||
|
||||
outputbytes = DatumGetByteaP(FunctionCall2(&thisState->finfo,
|
||||
attr,
|
||||
ObjectIdGetDatum(thisState->typioparam)));
|
||||
ObjectIdGetDatum(thisState->typioparam)));
|
||||
/* We assume the result will not have been toasted */
|
||||
pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4);
|
||||
pq_sendbytes(&buf, VARDATA(outputbytes),
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/tupdesc.c,v 1.105 2004/08/29 04:12:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/tupdesc.c,v 1.106 2004/08/29 05:06:39 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* some of the executor utility code such as "ExecTypeFromTL" should be
|
||||
@@ -52,8 +52,8 @@ CreateTemplateTupleDesc(int natts, bool hasoid)
|
||||
|
||||
/*
|
||||
* Allocate enough memory for the tuple descriptor, and zero the
|
||||
* attrs[] array since TupleDescInitEntry assumes that the array
|
||||
* is filled with NULL pointers.
|
||||
* attrs[] array since TupleDescInitEntry assumes that the array is
|
||||
* filled with NULL pointers.
|
||||
*/
|
||||
desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
|
||||
|
||||
@@ -420,8 +420,8 @@ TupleDescInitEntry(TupleDesc desc,
|
||||
|
||||
/*
|
||||
* Note: attributeName can be NULL, because the planner doesn't always
|
||||
* fill in valid resname values in targetlists, particularly for resjunk
|
||||
* attributes.
|
||||
* fill in valid resname values in targetlists, particularly for
|
||||
* resjunk attributes.
|
||||
*/
|
||||
if (attributeName != NULL)
|
||||
namestrcpy(&(att->attname), attributeName);
|
||||
@@ -464,7 +464,7 @@ TupleDescInitEntry(TupleDesc desc,
|
||||
* Given a relation schema (list of ColumnDef nodes), build a TupleDesc.
|
||||
*
|
||||
* Note: the default assumption is no OIDs; caller may modify the returned
|
||||
* TupleDesc if it wants OIDs. Also, tdtypeid will need to be filled in
|
||||
* TupleDesc if it wants OIDs. Also, tdtypeid will need to be filled in
|
||||
* later on.
|
||||
*/
|
||||
TupleDesc
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.110 2004/08/29 04:12:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.111 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -667,7 +667,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
|
||||
Datum attr[INDEX_MAX_KEYS];
|
||||
bool whatfree[INDEX_MAX_KEYS];
|
||||
char isnull[INDEX_MAX_KEYS];
|
||||
GistEntryVector *evec;
|
||||
GistEntryVector *evec;
|
||||
Datum datum;
|
||||
int datumsize,
|
||||
i,
|
||||
@@ -715,8 +715,8 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
|
||||
{
|
||||
evec->n = 2;
|
||||
gistentryinit(evec->vector[1],
|
||||
evec->vector[0].key, r, NULL,
|
||||
(OffsetNumber) 0, evec->vector[0].bytes, FALSE);
|
||||
evec->vector[0].key, r, NULL,
|
||||
(OffsetNumber) 0, evec->vector[0].bytes, FALSE);
|
||||
|
||||
}
|
||||
else
|
||||
@@ -763,7 +763,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
|
||||
static IndexTuple
|
||||
gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
|
||||
{
|
||||
GistEntryVector *evec;
|
||||
GistEntryVector *evec;
|
||||
Datum datum;
|
||||
int datumsize;
|
||||
bool result,
|
||||
@@ -879,7 +879,7 @@ gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITV
|
||||
int len,
|
||||
*attrsize;
|
||||
OffsetNumber *entries;
|
||||
GistEntryVector *evec;
|
||||
GistEntryVector *evec;
|
||||
Datum datum;
|
||||
int datumsize;
|
||||
int reallen;
|
||||
@@ -940,8 +940,8 @@ gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITV
|
||||
else
|
||||
{
|
||||
/*
|
||||
* evec->vector[0].bytes may be not
|
||||
* defined, so form union with itself
|
||||
* evec->vector[0].bytes may be not defined, so form union
|
||||
* with itself
|
||||
*/
|
||||
if (reallen == 1)
|
||||
{
|
||||
@@ -1056,7 +1056,7 @@ gistadjsubkey(Relation r,
|
||||
*ev1p;
|
||||
float lpenalty,
|
||||
rpenalty;
|
||||
GistEntryVector *evec;
|
||||
GistEntryVector *evec;
|
||||
int datumsize;
|
||||
bool isnull[INDEX_MAX_KEYS];
|
||||
int i,
|
||||
@@ -1222,7 +1222,7 @@ gistSplit(Relation r,
|
||||
rbknum;
|
||||
GISTPageOpaque opaque;
|
||||
GIST_SPLITVEC v;
|
||||
GistEntryVector *entryvec;
|
||||
GistEntryVector *entryvec;
|
||||
bool *decompvec;
|
||||
int i,
|
||||
j,
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.41 2004/08/29 04:12:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.42 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -250,9 +250,10 @@ gistindex_keytest(IndexTuple tuple,
|
||||
FALSE, isNull);
|
||||
|
||||
/*
|
||||
* Call the Consistent function to evaluate the test. The arguments
|
||||
* are the index datum (as a GISTENTRY*), the comparison datum, and
|
||||
* the comparison operator's strategy number and subtype from pg_amop.
|
||||
* Call the Consistent function to evaluate the test. The
|
||||
* arguments are the index datum (as a GISTENTRY*), the comparison
|
||||
* datum, and the comparison operator's strategy number and
|
||||
* subtype from pg_amop.
|
||||
*
|
||||
* (Presently there's no need to pass the subtype since it'll always
|
||||
* be zero, but might as well pass it for possible future use.)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.54 2004/08/29 04:12:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.55 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -115,9 +115,7 @@ gistrescan(PG_FUNCTION_ARGS)
|
||||
* the sk_subtype field.
|
||||
*/
|
||||
for (i = 0; i < s->numberOfKeys; i++)
|
||||
{
|
||||
s->keyData[i].sk_func = p->giststate->consistentFn[s->keyData[i].sk_attno - 1];
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_VOID();
|
||||
@@ -266,9 +264,9 @@ ReleaseResources_gist(void)
|
||||
GISTScanList next;
|
||||
|
||||
/*
|
||||
* Note: this should be a no-op during normal query shutdown.
|
||||
* However, in an abort situation ExecutorEnd is not called and so
|
||||
* there may be open index scans to clean up.
|
||||
* Note: this should be a no-op during normal query shutdown. However,
|
||||
* in an abort situation ExecutorEnd is not called and so there may be
|
||||
* open index scans to clean up.
|
||||
*/
|
||||
prev = NULL;
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.72 2004/08/29 04:12:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.73 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file contains only the public interface routines.
|
||||
@@ -210,8 +210,8 @@ hashgettuple(PG_FUNCTION_ARGS)
|
||||
bool res;
|
||||
|
||||
/*
|
||||
* We hold pin but not lock on current buffer while outside the hash AM.
|
||||
* Reacquire the read lock here.
|
||||
* We hold pin but not lock on current buffer while outside the hash
|
||||
* AM. Reacquire the read lock here.
|
||||
*/
|
||||
if (BufferIsValid(so->hashso_curbuf))
|
||||
_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ);
|
||||
@@ -470,7 +470,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)
|
||||
/*
|
||||
* Read the metapage to fetch original bucket and tuple counts. Also,
|
||||
* we keep a copy of the last-seen metapage so that we can use its
|
||||
* hashm_spares[] values to compute bucket page addresses. This is a
|
||||
* hashm_spares[] values to compute bucket page addresses. This is a
|
||||
* bit hokey but perfectly safe, since the interesting entries in the
|
||||
* spares array cannot change under us; and it beats rereading the
|
||||
* metapage for each bucket.
|
||||
@@ -532,7 +532,7 @@ loop_top:
|
||||
ItemPointer htup;
|
||||
|
||||
hitem = (HashItem) PageGetItem(page,
|
||||
PageGetItemId(page, offno));
|
||||
PageGetItemId(page, offno));
|
||||
htup = &(hitem->hash_itup.t_tid);
|
||||
if (callback(htup, callback_state))
|
||||
{
|
||||
@@ -595,8 +595,8 @@ loop_top:
|
||||
orig_ntuples == metap->hashm_ntuples)
|
||||
{
|
||||
/*
|
||||
* No one has split or inserted anything since start of scan,
|
||||
* so believe our count as gospel.
|
||||
* No one has split or inserted anything since start of scan, so
|
||||
* believe our count as gospel.
|
||||
*/
|
||||
metap->hashm_ntuples = num_index_tuples;
|
||||
}
|
||||
@@ -604,7 +604,7 @@ loop_top:
|
||||
{
|
||||
/*
|
||||
* Otherwise, our count is untrustworthy since we may have
|
||||
* double-scanned tuples in split buckets. Proceed by
|
||||
* double-scanned tuples in split buckets. Proceed by
|
||||
* dead-reckoning.
|
||||
*/
|
||||
if (metap->hashm_ntuples > tuples_removed)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.33 2004/08/29 04:12:18 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.34 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -20,7 +20,7 @@
|
||||
|
||||
|
||||
static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
|
||||
Size itemsize, HashItem hitem);
|
||||
Size itemsize, HashItem hitem);
|
||||
|
||||
|
||||
/*
|
||||
@@ -81,7 +81,7 @@ _hash_doinsert(Relation rel, HashItem hitem)
|
||||
|
||||
/*
|
||||
* Check whether the item can fit on a hash page at all. (Eventually,
|
||||
* we ought to try to apply TOAST methods if not.) Note that at this
|
||||
* we ought to try to apply TOAST methods if not.) Note that at this
|
||||
* point, itemsz doesn't include the ItemId.
|
||||
*/
|
||||
if (itemsz > HashMaxItemSize((Page) metap))
|
||||
@@ -105,7 +105,8 @@ _hash_doinsert(Relation rel, HashItem hitem)
|
||||
_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
|
||||
|
||||
/*
|
||||
* Acquire share lock on target bucket; then we can release split lock.
|
||||
* Acquire share lock on target bucket; then we can release split
|
||||
* lock.
|
||||
*/
|
||||
_hash_getlock(rel, blkno, HASH_SHARE);
|
||||
|
||||
@@ -124,7 +125,7 @@ _hash_doinsert(Relation rel, HashItem hitem)
|
||||
/*
|
||||
* no space on this page; check for an overflow page
|
||||
*/
|
||||
BlockNumber nextblkno = pageopaque->hasho_nextblkno;
|
||||
BlockNumber nextblkno = pageopaque->hasho_nextblkno;
|
||||
|
||||
if (BlockNumberIsValid(nextblkno))
|
||||
{
|
||||
@@ -169,8 +170,8 @@ _hash_doinsert(Relation rel, HashItem hitem)
|
||||
_hash_droplock(rel, blkno, HASH_SHARE);
|
||||
|
||||
/*
|
||||
* Write-lock the metapage so we can increment the tuple count.
|
||||
* After incrementing it, check to see if it's time for a split.
|
||||
* Write-lock the metapage so we can increment the tuple count. After
|
||||
* incrementing it, check to see if it's time for a split.
|
||||
*/
|
||||
_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.43 2004/08/29 04:12:18 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.44 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Overflow pages look like ordinary relation pages.
|
||||
@@ -41,11 +41,11 @@ bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
|
||||
for (i = 1;
|
||||
i < splitnum && ovflbitnum > metap->hashm_spares[i];
|
||||
i++)
|
||||
/* loop */ ;
|
||||
/* loop */ ;
|
||||
|
||||
/*
|
||||
* Convert to absolute page number by adding the number of bucket pages
|
||||
* that exist before this split point.
|
||||
* Convert to absolute page number by adding the number of bucket
|
||||
* pages that exist before this split point.
|
||||
*/
|
||||
return (BlockNumber) ((1 << i) + ovflbitnum);
|
||||
}
|
||||
@@ -79,7 +79,7 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
|
||||
*
|
||||
* Add an overflow page to the bucket whose last page is pointed to by 'buf'.
|
||||
*
|
||||
* On entry, the caller must hold a pin but no lock on 'buf'. The pin is
|
||||
* On entry, the caller must hold a pin but no lock on 'buf'. The pin is
|
||||
* dropped before exiting (we assume the caller is not interested in 'buf'
|
||||
* anymore). The returned overflow page will be pinned and write-locked;
|
||||
* it is guaranteed to be empty.
|
||||
@@ -88,12 +88,12 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
|
||||
* That buffer is returned in the same state.
|
||||
*
|
||||
* The caller must hold at least share lock on the bucket, to ensure that
|
||||
* no one else tries to compact the bucket meanwhile. This guarantees that
|
||||
* no one else tries to compact the bucket meanwhile. This guarantees that
|
||||
* 'buf' won't stop being part of the bucket while it's unlocked.
|
||||
*
|
||||
* NB: since this could be executed concurrently by multiple processes,
|
||||
* one should not assume that the returned overflow page will be the
|
||||
* immediate successor of the originally passed 'buf'. Additional overflow
|
||||
* immediate successor of the originally passed 'buf'. Additional overflow
|
||||
* pages might have been added to the bucket chain in between.
|
||||
*/
|
||||
Buffer
|
||||
@@ -197,7 +197,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
|
||||
/* outer loop iterates once per bitmap page */
|
||||
for (;;)
|
||||
{
|
||||
BlockNumber mapblkno;
|
||||
BlockNumber mapblkno;
|
||||
Page mappage;
|
||||
uint32 last_inpage;
|
||||
|
||||
@@ -274,9 +274,9 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
|
||||
blkno = bitno_to_blkno(metap, bit);
|
||||
|
||||
/*
|
||||
* Adjust hashm_firstfree to avoid redundant searches. But don't
|
||||
* risk changing it if someone moved it while we were searching
|
||||
* bitmap pages.
|
||||
* Adjust hashm_firstfree to avoid redundant searches. But don't risk
|
||||
* changing it if someone moved it while we were searching bitmap
|
||||
* pages.
|
||||
*/
|
||||
if (metap->hashm_firstfree == orig_firstfree)
|
||||
metap->hashm_firstfree = bit + 1;
|
||||
@@ -304,9 +304,9 @@ found:
|
||||
blkno = bitno_to_blkno(metap, bit);
|
||||
|
||||
/*
|
||||
* Adjust hashm_firstfree to avoid redundant searches. But don't
|
||||
* risk changing it if someone moved it while we were searching
|
||||
* bitmap pages.
|
||||
* Adjust hashm_firstfree to avoid redundant searches. But don't risk
|
||||
* changing it if someone moved it while we were searching bitmap
|
||||
* pages.
|
||||
*/
|
||||
if (metap->hashm_firstfree == orig_firstfree)
|
||||
{
|
||||
@@ -381,7 +381,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
||||
Bucket bucket;
|
||||
|
||||
/* Get information from the doomed page */
|
||||
ovflblkno = BufferGetBlockNumber(ovflbuf);
|
||||
ovflblkno = BufferGetBlockNumber(ovflbuf);
|
||||
ovflpage = BufferGetPage(ovflbuf);
|
||||
_hash_checkpage(rel, ovflpage, LH_OVERFLOW_PAGE);
|
||||
ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
|
||||
@@ -396,7 +396,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
||||
/*
|
||||
* Fix up the bucket chain. this is a doubly-linked list, so we must
|
||||
* fix up the bucket chain members behind and ahead of the overflow
|
||||
* page being deleted. No concurrency issues since we hold exclusive
|
||||
* page being deleted. No concurrency issues since we hold exclusive
|
||||
* lock on the entire bucket.
|
||||
*/
|
||||
if (BlockNumberIsValid(prevblkno))
|
||||
@@ -488,7 +488,8 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
|
||||
|
||||
/*
|
||||
* It is okay to write-lock the new bitmap page while holding metapage
|
||||
* write lock, because no one else could be contending for the new page.
|
||||
* write lock, because no one else could be contending for the new
|
||||
* page.
|
||||
*
|
||||
* There is some loss of concurrency in possibly doing I/O for the new
|
||||
* page while holding the metapage lock, but this path is taken so
|
||||
@@ -654,8 +655,8 @@ _hash_squeezebucket(Relation rel,
|
||||
|
||||
/*
|
||||
* delete the tuple from the "read" page. PageIndexTupleDelete
|
||||
* repacks the ItemId array, so 'roffnum' will be "advanced" to
|
||||
* the "next" ItemId.
|
||||
* repacks the ItemId array, so 'roffnum' will be "advanced"
|
||||
* to the "next" ItemId.
|
||||
*/
|
||||
PageIndexTupleDelete(rpage, roffnum);
|
||||
}
|
||||
@@ -667,8 +668,9 @@ _hash_squeezebucket(Relation rel,
|
||||
* Tricky point here: if our read and write pages are adjacent in the
|
||||
* bucket chain, our write lock on wbuf will conflict with
|
||||
* _hash_freeovflpage's attempt to update the sibling links of the
|
||||
* removed page. However, in that case we are done anyway, so we can
|
||||
* simply drop the write lock before calling _hash_freeovflpage.
|
||||
* removed page. However, in that case we are done anyway, so we
|
||||
* can simply drop the write lock before calling
|
||||
* _hash_freeovflpage.
|
||||
*/
|
||||
if (PageIsEmpty(rpage))
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.45 2004/08/29 04:12:18 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.46 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres hash pages look like ordinary relation pages. The opaque
|
||||
@@ -35,11 +35,11 @@
|
||||
|
||||
|
||||
static void _hash_splitbucket(Relation rel, Buffer metabuf,
|
||||
Bucket obucket, Bucket nbucket,
|
||||
BlockNumber start_oblkno,
|
||||
BlockNumber start_nblkno,
|
||||
uint32 maxbucket,
|
||||
uint32 highmask, uint32 lowmask);
|
||||
Bucket obucket, Bucket nbucket,
|
||||
BlockNumber start_oblkno,
|
||||
BlockNumber start_nblkno,
|
||||
uint32 maxbucket,
|
||||
uint32 highmask, uint32 lowmask);
|
||||
|
||||
|
||||
/*
|
||||
@@ -47,7 +47,7 @@ static void _hash_splitbucket(Relation rel, Buffer metabuf,
|
||||
* of the locking rules). However, we can skip taking lmgr locks when the
|
||||
* index is local to the current backend (ie, either temp or new in the
|
||||
* current transaction). No one else can see it, so there's no reason to
|
||||
* take locks. We still take buffer-level locks, but not lmgr locks.
|
||||
* take locks. We still take buffer-level locks, but not lmgr locks.
|
||||
*/
|
||||
#define USELOCKING(rel) (!RELATION_IS_LOCAL(rel))
|
||||
|
||||
@@ -239,13 +239,13 @@ _hash_metapinit(Relation rel)
|
||||
RelationGetRelationName(rel));
|
||||
|
||||
/*
|
||||
* Determine the target fill factor (tuples per bucket) for this index.
|
||||
* The idea is to make the fill factor correspond to pages about 3/4ths
|
||||
* full. We can compute it exactly if the index datatype is fixed-width,
|
||||
* but for var-width there's some guessing involved.
|
||||
* Determine the target fill factor (tuples per bucket) for this
|
||||
* index. The idea is to make the fill factor correspond to pages
|
||||
* about 3/4ths full. We can compute it exactly if the index datatype
|
||||
* is fixed-width, but for var-width there's some guessing involved.
|
||||
*/
|
||||
data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,
|
||||
RelationGetDescr(rel)->attrs[0]->atttypmod);
|
||||
RelationGetDescr(rel)->attrs[0]->atttypmod);
|
||||
item_width = MAXALIGN(sizeof(HashItemData)) + MAXALIGN(data_width) +
|
||||
sizeof(ItemIdData); /* include the line pointer */
|
||||
ffactor = (BLCKSZ * 3 / 4) / item_width;
|
||||
@@ -288,8 +288,9 @@ _hash_metapinit(Relation rel)
|
||||
metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
|
||||
|
||||
/*
|
||||
* We initialize the index with two buckets, 0 and 1, occupying physical
|
||||
* blocks 1 and 2. The first freespace bitmap page is in block 3.
|
||||
* We initialize the index with two buckets, 0 and 1, occupying
|
||||
* physical blocks 1 and 2. The first freespace bitmap page is in
|
||||
* block 3.
|
||||
*/
|
||||
metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */
|
||||
metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */
|
||||
@@ -297,7 +298,7 @@ _hash_metapinit(Relation rel)
|
||||
MemSet((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
|
||||
MemSet((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
|
||||
|
||||
metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */
|
||||
metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */
|
||||
metap->hashm_ovflpoint = 1;
|
||||
metap->hashm_firstfree = 0;
|
||||
|
||||
@@ -319,8 +320,8 @@ _hash_metapinit(Relation rel)
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize first bitmap page. Can't do this until we
|
||||
* create the first two buckets, else smgr will complain.
|
||||
* Initialize first bitmap page. Can't do this until we create the
|
||||
* first two buckets, else smgr will complain.
|
||||
*/
|
||||
_hash_initbitmap(rel, metap, 3);
|
||||
|
||||
@@ -362,17 +363,18 @@ _hash_expandtable(Relation rel, Buffer metabuf)
|
||||
uint32 lowmask;
|
||||
|
||||
/*
|
||||
* Obtain the page-zero lock to assert the right to begin a split
|
||||
* (see README).
|
||||
* Obtain the page-zero lock to assert the right to begin a split (see
|
||||
* README).
|
||||
*
|
||||
* Note: deadlock should be impossible here. Our own backend could only
|
||||
* be holding bucket sharelocks due to stopped indexscans; those will not
|
||||
* block other holders of the page-zero lock, who are only interested in
|
||||
* acquiring bucket sharelocks themselves. Exclusive bucket locks are
|
||||
* only taken here and in hashbulkdelete, and neither of these operations
|
||||
* needs any additional locks to complete. (If, due to some flaw in this
|
||||
* reasoning, we manage to deadlock anyway, it's okay to error out; the
|
||||
* index will be left in a consistent state.)
|
||||
* be holding bucket sharelocks due to stopped indexscans; those will
|
||||
* not block other holders of the page-zero lock, who are only
|
||||
* interested in acquiring bucket sharelocks themselves. Exclusive
|
||||
* bucket locks are only taken here and in hashbulkdelete, and neither
|
||||
* of these operations needs any additional locks to complete. (If,
|
||||
* due to some flaw in this reasoning, we manage to deadlock anyway,
|
||||
* it's okay to error out; the index will be left in a consistent
|
||||
* state.)
|
||||
*/
|
||||
_hash_getlock(rel, 0, HASH_EXCLUSIVE);
|
||||
|
||||
@@ -383,8 +385,8 @@ _hash_expandtable(Relation rel, Buffer metabuf)
|
||||
_hash_checkpage(rel, (Page) metap, LH_META_PAGE);
|
||||
|
||||
/*
|
||||
* Check to see if split is still needed; someone else might have already
|
||||
* done one while we waited for the lock.
|
||||
* Check to see if split is still needed; someone else might have
|
||||
* already done one while we waited for the lock.
|
||||
*
|
||||
* Make sure this stays in sync with_hash_doinsert()
|
||||
*/
|
||||
@@ -394,16 +396,16 @@ _hash_expandtable(Relation rel, Buffer metabuf)
|
||||
|
||||
/*
|
||||
* Determine which bucket is to be split, and attempt to lock the old
|
||||
* bucket. If we can't get the lock, give up.
|
||||
* bucket. If we can't get the lock, give up.
|
||||
*
|
||||
* The lock protects us against other backends, but not against our own
|
||||
* backend. Must check for active scans separately.
|
||||
*
|
||||
* Ideally we would lock the new bucket too before proceeding, but if
|
||||
* we are about to cross a splitpoint then the BUCKET_TO_BLKNO mapping
|
||||
* Ideally we would lock the new bucket too before proceeding, but if we
|
||||
* are about to cross a splitpoint then the BUCKET_TO_BLKNO mapping
|
||||
* isn't correct yet. For simplicity we update the metapage first and
|
||||
* then lock. This should be okay because no one else should be trying
|
||||
* to lock the new bucket yet...
|
||||
* then lock. This should be okay because no one else should be
|
||||
* trying to lock the new bucket yet...
|
||||
*/
|
||||
new_bucket = metap->hashm_maxbucket + 1;
|
||||
old_bucket = (new_bucket & metap->hashm_lowmask);
|
||||
@@ -417,7 +419,8 @@ _hash_expandtable(Relation rel, Buffer metabuf)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Okay to proceed with split. Update the metapage bucket mapping info.
|
||||
* Okay to proceed with split. Update the metapage bucket mapping
|
||||
* info.
|
||||
*/
|
||||
metap->hashm_maxbucket = new_bucket;
|
||||
|
||||
@@ -431,11 +434,11 @@ _hash_expandtable(Relation rel, Buffer metabuf)
|
||||
/*
|
||||
* If the split point is increasing (hashm_maxbucket's log base 2
|
||||
* increases), we need to adjust the hashm_spares[] array and
|
||||
* hashm_ovflpoint so that future overflow pages will be created beyond
|
||||
* this new batch of bucket pages.
|
||||
* hashm_ovflpoint so that future overflow pages will be created
|
||||
* beyond this new batch of bucket pages.
|
||||
*
|
||||
* XXX should initialize new bucket pages to prevent out-of-order
|
||||
* page creation? Don't wanna do it right here though.
|
||||
* XXX should initialize new bucket pages to prevent out-of-order page
|
||||
* creation? Don't wanna do it right here though.
|
||||
*/
|
||||
spare_ndx = _hash_log2(metap->hashm_maxbucket + 1);
|
||||
if (spare_ndx > metap->hashm_ovflpoint)
|
||||
@@ -456,9 +459,10 @@ _hash_expandtable(Relation rel, Buffer metabuf)
|
||||
/*
|
||||
* Copy bucket mapping info now; this saves re-accessing the meta page
|
||||
* inside _hash_splitbucket's inner loop. Note that once we drop the
|
||||
* split lock, other splits could begin, so these values might be out of
|
||||
* date before _hash_splitbucket finishes. That's okay, since all it
|
||||
* needs is to tell which of these two buckets to map hashkeys into.
|
||||
* split lock, other splits could begin, so these values might be out
|
||||
* of date before _hash_splitbucket finishes. That's okay, since all
|
||||
* it needs is to tell which of these two buckets to map hashkeys
|
||||
* into.
|
||||
*/
|
||||
maxbucket = metap->hashm_maxbucket;
|
||||
highmask = metap->hashm_highmask;
|
||||
@@ -539,8 +543,8 @@ _hash_splitbucket(Relation rel,
|
||||
|
||||
/*
|
||||
* It should be okay to simultaneously write-lock pages from each
|
||||
* bucket, since no one else can be trying to acquire buffer lock
|
||||
* on pages of either bucket.
|
||||
* bucket, since no one else can be trying to acquire buffer lock on
|
||||
* pages of either bucket.
|
||||
*/
|
||||
oblkno = start_oblkno;
|
||||
nblkno = start_nblkno;
|
||||
@@ -562,9 +566,9 @@ _hash_splitbucket(Relation rel,
|
||||
nopaque->hasho_filler = HASHO_FILL;
|
||||
|
||||
/*
|
||||
* Partition the tuples in the old bucket between the old bucket and the
|
||||
* new bucket, advancing along the old bucket's overflow bucket chain
|
||||
* and adding overflow pages to the new bucket as needed.
|
||||
* Partition the tuples in the old bucket between the old bucket and
|
||||
* the new bucket, advancing along the old bucket's overflow bucket
|
||||
* chain and adding overflow pages to the new bucket as needed.
|
||||
*/
|
||||
ooffnum = FirstOffsetNumber;
|
||||
omaxoffnum = PageGetMaxOffsetNumber(opage);
|
||||
@@ -582,9 +586,10 @@ _hash_splitbucket(Relation rel,
|
||||
oblkno = oopaque->hasho_nextblkno;
|
||||
if (!BlockNumberIsValid(oblkno))
|
||||
break;
|
||||
|
||||
/*
|
||||
* we ran out of tuples on this particular page, but we
|
||||
* have more overflow pages; advance to next page.
|
||||
* we ran out of tuples on this particular page, but we have
|
||||
* more overflow pages; advance to next page.
|
||||
*/
|
||||
_hash_wrtbuf(rel, obuf);
|
||||
|
||||
@@ -600,8 +605,8 @@ _hash_splitbucket(Relation rel,
|
||||
/*
|
||||
* Re-hash the tuple to determine which bucket it now belongs in.
|
||||
*
|
||||
* It is annoying to call the hash function while holding locks,
|
||||
* but releasing and relocking the page for each tuple is unappealing
|
||||
* It is annoying to call the hash function while holding locks, but
|
||||
* releasing and relocking the page for each tuple is unappealing
|
||||
* too.
|
||||
*/
|
||||
hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));
|
||||
@@ -666,10 +671,11 @@ _hash_splitbucket(Relation rel,
|
||||
}
|
||||
|
||||
/*
|
||||
* We're at the end of the old bucket chain, so we're done partitioning
|
||||
* the tuples. Before quitting, call _hash_squeezebucket to ensure the
|
||||
* tuples remaining in the old bucket (including the overflow pages) are
|
||||
* packed as tightly as possible. The new bucket is already tight.
|
||||
* We're at the end of the old bucket chain, so we're done
|
||||
* partitioning the tuples. Before quitting, call _hash_squeezebucket
|
||||
* to ensure the tuples remaining in the old bucket (including the
|
||||
* overflow pages) are packed as tightly as possible. The new bucket
|
||||
* is already tight.
|
||||
*/
|
||||
_hash_wrtbuf(rel, obuf);
|
||||
_hash_wrtbuf(rel, nbuf);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashscan.c,v 1.36 2004/08/29 04:12:18 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashscan.c,v 1.37 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -44,9 +44,9 @@ ReleaseResources_hash(void)
|
||||
HashScanList next;
|
||||
|
||||
/*
|
||||
* Note: this should be a no-op during normal query shutdown.
|
||||
* However, in an abort situation ExecutorEnd is not called and so
|
||||
* there may be open index scans to clean up.
|
||||
* Note: this should be a no-op during normal query shutdown. However,
|
||||
* in an abort situation ExecutorEnd is not called and so there may be
|
||||
* open index scans to clean up.
|
||||
*/
|
||||
prev = NULL;
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.36 2004/08/29 04:12:18 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.37 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -137,12 +137,13 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* We do not support hash scans with no index qualification, because
|
||||
* we would have to read the whole index rather than just one bucket.
|
||||
* That creates a whole raft of problems, since we haven't got a
|
||||
* practical way to lock all the buckets against splits or compactions.
|
||||
* practical way to lock all the buckets against splits or
|
||||
* compactions.
|
||||
*/
|
||||
if (scan->numberOfKeys < 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("hash indexes do not support whole-index scans")));
|
||||
errmsg("hash indexes do not support whole-index scans")));
|
||||
|
||||
/*
|
||||
* If the constant in the index qual is NULL, assume it cannot match
|
||||
@@ -182,7 +183,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
|
||||
_hash_relbuf(rel, metabuf);
|
||||
|
||||
/*
|
||||
* Acquire share lock on target bucket; then we can release split lock.
|
||||
* Acquire share lock on target bucket; then we can release split
|
||||
* lock.
|
||||
*/
|
||||
_hash_getlock(rel, blkno, HASH_SHARE);
|
||||
|
||||
@@ -287,9 +289,8 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
while (offnum > maxoff)
|
||||
{
|
||||
/*
|
||||
* either this page is empty
|
||||
* (maxoff == InvalidOffsetNumber)
|
||||
* or we ran off the end.
|
||||
* either this page is empty (maxoff ==
|
||||
* InvalidOffsetNumber) or we ran off the end.
|
||||
*/
|
||||
_hash_readnext(rel, &buf, &page, &opaque);
|
||||
if (BufferIsValid(buf))
|
||||
@@ -315,15 +316,12 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
while (offnum < FirstOffsetNumber)
|
||||
{
|
||||
/*
|
||||
* either this page is empty
|
||||
* (offnum == InvalidOffsetNumber)
|
||||
* or we ran off the end.
|
||||
* either this page is empty (offnum ==
|
||||
* InvalidOffsetNumber) or we ran off the end.
|
||||
*/
|
||||
_hash_readprev(rel, &buf, &page, &opaque);
|
||||
if (BufferIsValid(buf))
|
||||
{
|
||||
maxoff = offnum = PageGetMaxOffsetNumber(page);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* end of bucket */
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashutil.c,v 1.39 2004/08/29 04:12:18 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashutil.c,v 1.40 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -113,6 +113,7 @@ void
|
||||
_hash_checkpage(Relation rel, Page page, int flags)
|
||||
{
|
||||
Assert(page);
|
||||
|
||||
/*
|
||||
* When checking the metapage, always verify magic number and version.
|
||||
*/
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.172 2004/08/29 04:12:20 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.173 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@@ -75,9 +75,9 @@ initscan(HeapScanDesc scan, ScanKey key)
|
||||
/*
|
||||
* Determine the number of blocks we have to scan.
|
||||
*
|
||||
* It is sufficient to do this once at scan start, since any tuples
|
||||
* added while the scan is in progress will be invisible to my
|
||||
* transaction anyway...
|
||||
* It is sufficient to do this once at scan start, since any tuples added
|
||||
* while the scan is in progress will be invisible to my transaction
|
||||
* anyway...
|
||||
*/
|
||||
scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
|
||||
|
||||
@@ -1141,12 +1141,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
|
||||
tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
|
||||
HeapTupleHeaderSetXmin(tup->t_data, GetCurrentTransactionId());
|
||||
HeapTupleHeaderSetCmin(tup->t_data, cid);
|
||||
HeapTupleHeaderSetCmax(tup->t_data, 0); /* zero out Datum fields */
|
||||
HeapTupleHeaderSetCmax(tup->t_data, 0); /* zero out Datum fields */
|
||||
tup->t_tableOid = relation->rd_id;
|
||||
|
||||
/*
|
||||
* If the new tuple is too big for storage or contains already toasted
|
||||
* out-of-line attributes from some other relation, invoke the toaster.
|
||||
* out-of-line attributes from some other relation, invoke the
|
||||
* toaster.
|
||||
*/
|
||||
if (HeapTupleHasExternal(tup) ||
|
||||
(MAXALIGN(tup->t_len) > TOAST_TUPLE_THRESHOLD))
|
||||
@@ -1273,7 +1274,7 @@ simple_heap_insert(Relation relation, HeapTuple tup)
|
||||
*/
|
||||
int
|
||||
heap_delete(Relation relation, ItemPointer tid,
|
||||
ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait)
|
||||
ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait)
|
||||
{
|
||||
ItemId lp;
|
||||
HeapTupleData tp;
|
||||
@@ -1404,9 +1405,9 @@ l1:
|
||||
|
||||
/*
|
||||
* If the tuple has toasted out-of-line attributes, we need to delete
|
||||
* those items too. We have to do this before WriteBuffer because we need
|
||||
* to look at the contents of the tuple, but it's OK to release the
|
||||
* context lock on the buffer first.
|
||||
* those items too. We have to do this before WriteBuffer because we
|
||||
* need to look at the contents of the tuple, but it's OK to release
|
||||
* the context lock on the buffer first.
|
||||
*/
|
||||
if (HeapTupleHasExternal(&tp))
|
||||
heap_tuple_toast_attrs(relation, NULL, &tp);
|
||||
@@ -1443,7 +1444,7 @@ simple_heap_delete(Relation relation, ItemPointer tid)
|
||||
result = heap_delete(relation, tid,
|
||||
&ctid,
|
||||
GetCurrentCommandId(), SnapshotAny,
|
||||
true /* wait for commit */);
|
||||
true /* wait for commit */ );
|
||||
switch (result)
|
||||
{
|
||||
case HeapTupleSelfUpdated:
|
||||
@@ -1490,7 +1491,7 @@ simple_heap_delete(Relation relation, ItemPointer tid)
|
||||
*/
|
||||
int
|
||||
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
|
||||
ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait)
|
||||
ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait)
|
||||
{
|
||||
ItemId lp;
|
||||
HeapTupleData oldtup;
|
||||
@@ -1804,7 +1805,7 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
|
||||
result = heap_update(relation, otid, tup,
|
||||
&ctid,
|
||||
GetCurrentCommandId(), SnapshotAny,
|
||||
true /* wait for commit */);
|
||||
true /* wait for commit */ );
|
||||
switch (result)
|
||||
{
|
||||
case HeapTupleSelfUpdated:
|
||||
@@ -2198,8 +2199,8 @@ heap_xlog_newpage(bool redo, XLogRecPtr lsn, XLogRecord *record)
|
||||
Page page;
|
||||
|
||||
/*
|
||||
* Note: the NEWPAGE log record is used for both heaps and indexes,
|
||||
* so do not do anything that assumes we are touching a heap.
|
||||
* Note: the NEWPAGE log record is used for both heaps and indexes, so
|
||||
* do not do anything that assumes we are touching a heap.
|
||||
*/
|
||||
|
||||
if (!redo || (record->xl_info & XLR_BKP_BLOCK_1))
|
||||
@@ -2668,7 +2669,7 @@ static void
|
||||
out_target(char *buf, xl_heaptid *target)
|
||||
{
|
||||
sprintf(buf + strlen(buf), "rel %u/%u/%u; tid %u/%u",
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
ItemPointerGetBlockNumber(&(target->tid)),
|
||||
ItemPointerGetOffsetNumber(&(target->tid)));
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.44 2004/08/29 04:12:20 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.45 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@@ -288,13 +288,13 @@ toast_delete(Relation rel, HeapTuple oldtup)
|
||||
/*
|
||||
* Get the tuple descriptor and break down the tuple into fields.
|
||||
*
|
||||
* NOTE: it's debatable whether to use heap_deformtuple() here or
|
||||
* just heap_getattr() only the varlena columns. The latter could
|
||||
* win if there are few varlena columns and many non-varlena ones.
|
||||
* However, heap_deformtuple costs only O(N) while the heap_getattr
|
||||
* way would cost O(N^2) if there are many varlena columns, so it
|
||||
* seems better to err on the side of linear cost. (We won't even
|
||||
* be here unless there's at least one varlena column, by the way.)
|
||||
* NOTE: it's debatable whether to use heap_deformtuple() here or just
|
||||
* heap_getattr() only the varlena columns. The latter could win if
|
||||
* there are few varlena columns and many non-varlena ones. However,
|
||||
* heap_deformtuple costs only O(N) while the heap_getattr way would
|
||||
* cost O(N^2) if there are many varlena columns, so it seems better
|
||||
* to err on the side of linear cost. (We won't even be here unless
|
||||
* there's at least one varlena column, by the way.)
|
||||
*/
|
||||
tupleDesc = rel->rd_att;
|
||||
att = tupleDesc->attrs;
|
||||
@@ -311,7 +311,7 @@ toast_delete(Relation rel, HeapTuple oldtup)
|
||||
{
|
||||
if (att[i]->attlen == -1)
|
||||
{
|
||||
Datum value = toast_values[i];
|
||||
Datum value = toast_values[i];
|
||||
|
||||
if (toast_nulls[i] != 'n' && VARATT_IS_EXTERNAL(value))
|
||||
toast_delete_datum(rel, value);
|
||||
@@ -791,7 +791,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
|
||||
*
|
||||
* If a Datum is of composite type, "flatten" it to contain no toasted fields.
|
||||
* This must be invoked on any potentially-composite field that is to be
|
||||
* inserted into a tuple. Doing this preserves the invariant that toasting
|
||||
* inserted into a tuple. Doing this preserves the invariant that toasting
|
||||
* goes only one level deep in a tuple.
|
||||
* ----------
|
||||
*/
|
||||
@@ -1105,7 +1105,7 @@ toast_delete_datum(Relation rel, Datum value)
|
||||
ScanKeyInit(&toastkey,
|
||||
(AttrNumber) 1,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
|
||||
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
|
||||
|
||||
/*
|
||||
* Find the chunks by index
|
||||
@@ -1176,7 +1176,7 @@ toast_fetch_datum(varattrib *attr)
|
||||
ScanKeyInit(&toastkey,
|
||||
(AttrNumber) 1,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
|
||||
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
|
||||
|
||||
/*
|
||||
* Read the chunks by index
|
||||
@@ -1330,7 +1330,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
|
||||
ScanKeyInit(&toastkey[0],
|
||||
(AttrNumber) 1,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
|
||||
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
|
||||
|
||||
/*
|
||||
* Use equality condition for one chunk, a range condition otherwise:
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.115 2004/08/29 04:12:21 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.116 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -200,26 +200,26 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
|
||||
* We can skip items that are marked killed.
|
||||
*
|
||||
* Formerly, we applied _bt_isequal() before checking the kill
|
||||
* flag, so as to fall out of the item loop as soon as possible.
|
||||
* However, in the presence of heavy update activity an index
|
||||
* may contain many killed items with the same key; running
|
||||
* _bt_isequal() on each killed item gets expensive. Furthermore
|
||||
* it is likely that the non-killed version of each key appears
|
||||
* first, so that we didn't actually get to exit any sooner anyway.
|
||||
* So now we just advance over killed items as quickly as we can.
|
||||
* We only apply _bt_isequal() when we get to a non-killed item or
|
||||
* the end of the page.
|
||||
* flag, so as to fall out of the item loop as soon as
|
||||
* possible. However, in the presence of heavy update activity
|
||||
* an index may contain many killed items with the same key;
|
||||
* running _bt_isequal() on each killed item gets expensive.
|
||||
* Furthermore it is likely that the non-killed version of
|
||||
* each key appears first, so that we didn't actually get to
|
||||
* exit any sooner anyway. So now we just advance over killed
|
||||
* items as quickly as we can. We only apply _bt_isequal()
|
||||
* when we get to a non-killed item or the end of the page.
|
||||
*/
|
||||
if (!ItemIdDeleted(curitemid))
|
||||
{
|
||||
/*
|
||||
* _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's
|
||||
* how we handling NULLs - and so we must not use _bt_compare
|
||||
* in real comparison, but only for ordering/finding items on
|
||||
* pages. - vadim 03/24/97
|
||||
* _bt_compare returns 0 for (1,NULL) and (1,NULL) -
|
||||
* this's how we handling NULLs - and so we must not use
|
||||
* _bt_compare in real comparison, but only for
|
||||
* ordering/finding items on pages. - vadim 03/24/97
|
||||
*/
|
||||
if (!_bt_isequal(itupdesc, page, offset, natts, itup_scankey))
|
||||
break; /* we're past all the equal tuples */
|
||||
break; /* we're past all the equal tuples */
|
||||
|
||||
/* okay, we gotta fetch the heap tuple ... */
|
||||
cbti = (BTItem) PageGetItem(page, curitemid);
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.79 2004/08/29 04:12:21 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.80 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@@ -276,8 +276,8 @@ _bt_getroot(Relation rel, int access)
|
||||
rootlevel = metad->btm_fastlevel;
|
||||
|
||||
/*
|
||||
* We are done with the metapage; arrange to release it via
|
||||
* first _bt_relandgetbuf call
|
||||
* We are done with the metapage; arrange to release it via first
|
||||
* _bt_relandgetbuf call
|
||||
*/
|
||||
rootbuf = metabuf;
|
||||
|
||||
@@ -368,8 +368,8 @@ _bt_gettrueroot(Relation rel)
|
||||
rootlevel = metad->btm_level;
|
||||
|
||||
/*
|
||||
* We are done with the metapage; arrange to release it via
|
||||
* first _bt_relandgetbuf call
|
||||
* We are done with the metapage; arrange to release it via first
|
||||
* _bt_relandgetbuf call
|
||||
*/
|
||||
rootbuf = metabuf;
|
||||
|
||||
@@ -433,21 +433,22 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
* page could have been re-used between the time the last VACUUM
|
||||
* scanned it and the time the VACUUM made its FSM updates.)
|
||||
*
|
||||
* In fact, it's worse than that: we can't even assume that it's
|
||||
* safe to take a lock on the reported page. If somebody else
|
||||
* has a lock on it, or even worse our own caller does, we could
|
||||
* In fact, it's worse than that: we can't even assume that it's safe
|
||||
* to take a lock on the reported page. If somebody else has a
|
||||
* lock on it, or even worse our own caller does, we could
|
||||
* deadlock. (The own-caller scenario is actually not improbable.
|
||||
* Consider an index on a serial or timestamp column. Nearly all
|
||||
* splits will be at the rightmost page, so it's entirely likely
|
||||
* that _bt_split will call us while holding a lock on the page most
|
||||
* recently acquired from FSM. A VACUUM running concurrently with
|
||||
* the previous split could well have placed that page back in FSM.)
|
||||
* that _bt_split will call us while holding a lock on the page
|
||||
* most recently acquired from FSM. A VACUUM running concurrently
|
||||
* with the previous split could well have placed that page back
|
||||
* in FSM.)
|
||||
*
|
||||
* To get around that, we ask for only a conditional lock on the
|
||||
* reported page. If we fail, then someone else is using the page,
|
||||
* and we may reasonably assume it's not free. (If we happen to be
|
||||
* wrong, the worst consequence is the page will be lost to use till
|
||||
* the next VACUUM, which is no big problem.)
|
||||
* reported page. If we fail, then someone else is using the
|
||||
* page, and we may reasonably assume it's not free. (If we
|
||||
* happen to be wrong, the worst consequence is the page will be
|
||||
* lost to use till the next VACUUM, which is no big problem.)
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.88 2004/08/29 04:12:21 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.89 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -155,15 +155,16 @@ _bt_moveright(Relation rel,
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* When nextkey = false (normal case): if the scan key that brought us to
|
||||
* this page is > the high key stored on the page, then the page has split
|
||||
* and we need to move right. (If the scan key is equal to the high key,
|
||||
* we might or might not need to move right; have to scan the page first
|
||||
* anyway.)
|
||||
* When nextkey = false (normal case): if the scan key that brought us
|
||||
* to this page is > the high key stored on the page, then the page
|
||||
* has split and we need to move right. (If the scan key is equal to
|
||||
* the high key, we might or might not need to move right; have to
|
||||
* scan the page first anyway.)
|
||||
*
|
||||
* When nextkey = true: move right if the scan key is >= page's high key.
|
||||
*
|
||||
* The page could even have split more than once, so scan as far as needed.
|
||||
* The page could even have split more than once, so scan as far as
|
||||
* needed.
|
||||
*
|
||||
* We also have to move right if we followed a link that brought us to a
|
||||
* dead page.
|
||||
@@ -253,13 +254,11 @@ _bt_binsrch(Relation rel,
|
||||
* Binary search to find the first key on the page >= scan key, or
|
||||
* first key > scankey when nextkey is true.
|
||||
*
|
||||
* For nextkey=false (cmpval=1), the loop invariant is: all slots
|
||||
* before 'low' are < scan key, all slots at or after 'high'
|
||||
* are >= scan key.
|
||||
* For nextkey=false (cmpval=1), the loop invariant is: all slots before
|
||||
* 'low' are < scan key, all slots at or after 'high' are >= scan key.
|
||||
*
|
||||
* For nextkey=true (cmpval=0), the loop invariant is: all slots
|
||||
* before 'low' are <= scan key, all slots at or after 'high'
|
||||
* are > scan key.
|
||||
* For nextkey=true (cmpval=0), the loop invariant is: all slots before
|
||||
* 'low' are <= scan key, all slots at or after 'high' are > scan key.
|
||||
*
|
||||
* We can fall out when high == low.
|
||||
*/
|
||||
@@ -285,15 +284,15 @@ _bt_binsrch(Relation rel,
|
||||
* At this point we have high == low, but be careful: they could point
|
||||
* past the last slot on the page.
|
||||
*
|
||||
* On a leaf page, we always return the first key >= scan key (resp.
|
||||
* > scan key), which could be the last slot + 1.
|
||||
* On a leaf page, we always return the first key >= scan key (resp. >
|
||||
* scan key), which could be the last slot + 1.
|
||||
*/
|
||||
if (P_ISLEAF(opaque))
|
||||
return low;
|
||||
|
||||
/*
|
||||
* On a non-leaf page, return the last key < scan key (resp. <= scan key).
|
||||
* There must be one if _bt_compare() is playing by the rules.
|
||||
* On a non-leaf page, return the last key < scan key (resp. <= scan
|
||||
* key). There must be one if _bt_compare() is playing by the rules.
|
||||
*/
|
||||
Assert(low > P_FIRSTDATAKEY(opaque));
|
||||
|
||||
@@ -382,10 +381,10 @@ _bt_compare(Relation rel,
|
||||
{
|
||||
/*
|
||||
* The sk_func needs to be passed the index value as left arg
|
||||
* and the sk_argument as right arg (they might be of different
|
||||
* types). Since it is convenient for callers to think of
|
||||
* _bt_compare as comparing the scankey to the index item,
|
||||
* we have to flip the sign of the comparison result.
|
||||
* and the sk_argument as right arg (they might be of
|
||||
* different types). Since it is convenient for callers to
|
||||
* think of _bt_compare as comparing the scankey to the index
|
||||
* item, we have to flip the sign of the comparison result.
|
||||
*
|
||||
* Note: curious-looking coding is to avoid overflow if
|
||||
* comparison function returns INT_MIN. There is no risk of
|
||||
@@ -497,7 +496,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
bool goback;
|
||||
bool continuescan;
|
||||
ScanKey scankeys;
|
||||
ScanKey *startKeys = NULL;
|
||||
ScanKey *startKeys = NULL;
|
||||
int keysCount = 0;
|
||||
int i;
|
||||
StrategyNumber strat_total;
|
||||
@@ -521,7 +520,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* We want to identify the keys that can be used as starting boundaries;
|
||||
* these are =, >, or >= keys for a forward scan or =, <, <= keys for
|
||||
* a backwards scan. We can use keys for multiple attributes so long as
|
||||
* the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
|
||||
* the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
|
||||
* a > or < boundary or find an attribute with no boundary (which can be
|
||||
* thought of as the same as "> -infinity"), we can't use keys for any
|
||||
* attributes to its right, because it would break our simplistic notion
|
||||
@@ -554,13 +553,15 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
ScanKey cur;
|
||||
|
||||
startKeys = (ScanKey *) palloc(so->numberOfKeys * sizeof(ScanKey));
|
||||
|
||||
/*
|
||||
* chosen is the so-far-chosen key for the current attribute, if any.
|
||||
* We don't cast the decision in stone until we reach keys for the
|
||||
* next attribute.
|
||||
* chosen is the so-far-chosen key for the current attribute, if
|
||||
* any. We don't cast the decision in stone until we reach keys
|
||||
* for the next attribute.
|
||||
*/
|
||||
curattr = 1;
|
||||
chosen = NULL;
|
||||
|
||||
/*
|
||||
* Loop iterates from 0 to numberOfKeys inclusive; we use the last
|
||||
* pass to handle after-last-key processing. Actual exit from the
|
||||
@@ -578,8 +579,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
if (chosen == NULL)
|
||||
break;
|
||||
startKeys[keysCount++] = chosen;
|
||||
|
||||
/*
|
||||
* Adjust strat_total, and quit if we have stored a > or < key.
|
||||
* Adjust strat_total, and quit if we have stored a > or <
|
||||
* key.
|
||||
*/
|
||||
strat = chosen->sk_strategy;
|
||||
if (strat != BTEqualStrategyNumber)
|
||||
@@ -589,11 +592,13 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
strat == BTLessStrategyNumber)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Done if that was the last attribute.
|
||||
*/
|
||||
if (i >= so->numberOfKeys)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Reset for next attr, which should be in sequence.
|
||||
*/
|
||||
@@ -646,8 +651,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
ScanKey cur = startKeys[i];
|
||||
|
||||
/*
|
||||
* _bt_preprocess_keys disallows it, but it's place to add some code
|
||||
* later
|
||||
* _bt_preprocess_keys disallows it, but it's place to add some
|
||||
* code later
|
||||
*/
|
||||
if (cur->sk_flags & SK_ISNULL)
|
||||
{
|
||||
@@ -656,10 +661,11 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
elog(ERROR, "btree doesn't support is(not)null, yet");
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If scankey operator is of default subtype, we can use the
|
||||
* cached comparison procedure; otherwise gotta look it up in
|
||||
* the catalogs.
|
||||
* cached comparison procedure; otherwise gotta look it up in the
|
||||
* catalogs.
|
||||
*/
|
||||
if (cur->sk_subtype == InvalidOid)
|
||||
{
|
||||
@@ -695,43 +701,46 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
/*
|
||||
* Examine the selected initial-positioning strategy to determine
|
||||
* exactly where we need to start the scan, and set flag variables
|
||||
* to control the code below.
|
||||
* exactly where we need to start the scan, and set flag variables to
|
||||
* control the code below.
|
||||
*
|
||||
* If nextkey = false, _bt_search and _bt_binsrch will locate the
|
||||
* first item >= scan key. If nextkey = true, they will locate the
|
||||
* first item > scan key.
|
||||
* If nextkey = false, _bt_search and _bt_binsrch will locate the first
|
||||
* item >= scan key. If nextkey = true, they will locate the first
|
||||
* item > scan key.
|
||||
*
|
||||
* If goback = true, we will then step back one item, while if
|
||||
* goback = false, we will start the scan on the located item.
|
||||
* If goback = true, we will then step back one item, while if goback =
|
||||
* false, we will start the scan on the located item.
|
||||
*
|
||||
* it's yet other place to add some code later for is(not)null ...
|
||||
*/
|
||||
switch (strat_total)
|
||||
{
|
||||
case BTLessStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item >= scankey, then back up one to arrive at last
|
||||
* item < scankey. (Note: this positioning strategy is only used
|
||||
* for a backward scan, so that is always the correct starting
|
||||
* position.)
|
||||
* Find first item >= scankey, then back up one to arrive at
|
||||
* last item < scankey. (Note: this positioning strategy is
|
||||
* only used for a backward scan, so that is always the
|
||||
* correct starting position.)
|
||||
*/
|
||||
nextkey = false;
|
||||
goback = true;
|
||||
break;
|
||||
|
||||
case BTLessEqualStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item > scankey, then back up one to arrive at last
|
||||
* item <= scankey. (Note: this positioning strategy is only used
|
||||
* for a backward scan, so that is always the correct starting
|
||||
* position.)
|
||||
* Find first item > scankey, then back up one to arrive at
|
||||
* last item <= scankey. (Note: this positioning strategy is
|
||||
* only used for a backward scan, so that is always the
|
||||
* correct starting position.)
|
||||
*/
|
||||
nextkey = true;
|
||||
goback = true;
|
||||
break;
|
||||
|
||||
case BTEqualStrategyNumber:
|
||||
|
||||
/*
|
||||
* If a backward scan was specified, need to start with last
|
||||
* equal item not first one.
|
||||
@@ -739,8 +748,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
if (ScanDirectionIsBackward(dir))
|
||||
{
|
||||
/*
|
||||
* This is the same as the <= strategy. We will check
|
||||
* at the end whether the found item is actually =.
|
||||
* This is the same as the <= strategy. We will check at
|
||||
* the end whether the found item is actually =.
|
||||
*/
|
||||
nextkey = true;
|
||||
goback = true;
|
||||
@@ -748,8 +757,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
else
|
||||
{
|
||||
/*
|
||||
* This is the same as the >= strategy. We will check
|
||||
* at the end whether the found item is actually =.
|
||||
* This is the same as the >= strategy. We will check at
|
||||
* the end whether the found item is actually =.
|
||||
*/
|
||||
nextkey = false;
|
||||
goback = false;
|
||||
@@ -757,18 +766,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
break;
|
||||
|
||||
case BTGreaterEqualStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item >= scankey. (This is only used for
|
||||
* forward scans.)
|
||||
* Find first item >= scankey. (This is only used for forward
|
||||
* scans.)
|
||||
*/
|
||||
nextkey = false;
|
||||
goback = false;
|
||||
break;
|
||||
|
||||
case BTGreaterStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item > scankey. (This is only used for
|
||||
* forward scans.)
|
||||
* Find first item > scankey. (This is only used for forward
|
||||
* scans.)
|
||||
*/
|
||||
nextkey = true;
|
||||
goback = false;
|
||||
@@ -814,23 +825,23 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
pfree(scankeys);
|
||||
|
||||
/*
|
||||
* If nextkey = false, we are positioned at the first item >= scan key,
|
||||
* or possibly at the end of a page on which all the existing items are
|
||||
* less than the scan key and we know that everything on later pages
|
||||
* is greater than or equal to scan key.
|
||||
* If nextkey = false, we are positioned at the first item >= scan
|
||||
* key, or possibly at the end of a page on which all the existing
|
||||
* items are less than the scan key and we know that everything on
|
||||
* later pages is greater than or equal to scan key.
|
||||
*
|
||||
* If nextkey = true, we are positioned at the first item > scan key,
|
||||
* or possibly at the end of a page on which all the existing items are
|
||||
* If nextkey = true, we are positioned at the first item > scan key, or
|
||||
* possibly at the end of a page on which all the existing items are
|
||||
* less than or equal to the scan key and we know that everything on
|
||||
* later pages is greater than scan key.
|
||||
*
|
||||
* The actually desired starting point is either this item or the prior
|
||||
* one, or in the end-of-page case it's the first item on the next page
|
||||
* or the last item on this page. We apply _bt_step if needed to get to
|
||||
* the right place.
|
||||
* one, or in the end-of-page case it's the first item on the next
|
||||
* page or the last item on this page. We apply _bt_step if needed to
|
||||
* get to the right place.
|
||||
*
|
||||
* If _bt_step fails (meaning we fell off the end of the index in
|
||||
* one direction or the other), then there are no matches so we just
|
||||
* If _bt_step fails (meaning we fell off the end of the index in one
|
||||
* direction or the other), then there are no matches so we just
|
||||
* return false.
|
||||
*/
|
||||
if (goback)
|
||||
@@ -1292,7 +1303,8 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
|
||||
itup = &(btitem->bti_itup);
|
||||
|
||||
/*
|
||||
* Okay, we are on the first or last tuple. Does it pass all the quals?
|
||||
* Okay, we are on the first or last tuple. Does it pass all the
|
||||
* quals?
|
||||
*/
|
||||
if (_bt_checkkeys(scan, itup, dir, &continuescan))
|
||||
{
|
||||
|
||||
@@ -41,11 +41,11 @@
|
||||
*
|
||||
* Since the index will never be used unless it is completely built,
|
||||
* from a crash-recovery point of view there is no need to WAL-log the
|
||||
* steps of the build. After completing the index build, we can just sync
|
||||
* steps of the build. After completing the index build, we can just sync
|
||||
* the whole file to disk using smgrimmedsync() before exiting this module.
|
||||
* This can be seen to be sufficient for crash recovery by considering that
|
||||
* it's effectively equivalent to what would happen if a CHECKPOINT occurred
|
||||
* just after the index build. However, it is clearly not sufficient if the
|
||||
* just after the index build. However, it is clearly not sufficient if the
|
||||
* DBA is using the WAL log for PITR or replication purposes, since another
|
||||
* machine would not be able to reconstruct the index from WAL. Therefore,
|
||||
* we log the completed index pages to WAL if and only if WAL archiving is
|
||||
@@ -56,7 +56,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.87 2004/08/29 04:12:21 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.88 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -98,7 +98,7 @@ struct BTSpool
|
||||
typedef struct BTPageState
|
||||
{
|
||||
Page btps_page; /* workspace for page building */
|
||||
BlockNumber btps_blkno; /* block # to write this page at */
|
||||
BlockNumber btps_blkno; /* block # to write this page at */
|
||||
BTItem btps_minkey; /* copy of minimum key (first item) on
|
||||
* page */
|
||||
OffsetNumber btps_lastoff; /* last item offset loaded */
|
||||
@@ -114,10 +114,10 @@ typedef struct BTPageState
|
||||
typedef struct BTWriteState
|
||||
{
|
||||
Relation index;
|
||||
bool btws_use_wal; /* dump pages to WAL? */
|
||||
BlockNumber btws_pages_alloced; /* # pages allocated */
|
||||
BlockNumber btws_pages_written; /* # pages written out */
|
||||
Page btws_zeropage; /* workspace for filling zeroes */
|
||||
bool btws_use_wal; /* dump pages to WAL? */
|
||||
BlockNumber btws_pages_alloced; /* # pages allocated */
|
||||
BlockNumber btws_pages_written; /* # pages written out */
|
||||
Page btws_zeropage; /* workspace for filling zeroes */
|
||||
} BTWriteState;
|
||||
|
||||
|
||||
@@ -136,7 +136,7 @@ static void _bt_sortaddtup(Page page, Size itemsize,
|
||||
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti);
|
||||
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state);
|
||||
static void _bt_load(BTWriteState *wstate,
|
||||
BTSpool *btspool, BTSpool *btspool2);
|
||||
BTSpool *btspool, BTSpool *btspool2);
|
||||
|
||||
|
||||
/*
|
||||
@@ -157,12 +157,12 @@ _bt_spoolinit(Relation index, bool isunique, bool isdead)
|
||||
btspool->isunique = isunique;
|
||||
|
||||
/*
|
||||
* We size the sort area as maintenance_work_mem rather than work_mem to
|
||||
* speed index creation. This should be OK since a single backend can't
|
||||
* run multiple index creations in parallel. Note that creation of a
|
||||
* unique index actually requires two BTSpool objects. We expect that the
|
||||
* second one (for dead tuples) won't get very full, so we give it only
|
||||
* work_mem.
|
||||
* We size the sort area as maintenance_work_mem rather than work_mem
|
||||
* to speed index creation. This should be OK since a single backend
|
||||
* can't run multiple index creations in parallel. Note that creation
|
||||
* of a unique index actually requires two BTSpool objects. We expect
|
||||
* that the second one (for dead tuples) won't get very full, so we
|
||||
* give it only work_mem.
|
||||
*/
|
||||
btKbytes = isdead ? work_mem : maintenance_work_mem;
|
||||
btspool->sortstate = tuplesort_begin_index(index, isunique,
|
||||
@@ -205,7 +205,7 @@ _bt_spool(BTItem btitem, BTSpool *btspool)
|
||||
void
|
||||
_bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
|
||||
{
|
||||
BTWriteState wstate;
|
||||
BTWriteState wstate;
|
||||
|
||||
#ifdef BTREE_BUILD_STATS
|
||||
if (log_btree_build_stats)
|
||||
@@ -220,6 +220,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
|
||||
tuplesort_performsort(btspool2->sortstate);
|
||||
|
||||
wstate.index = btspool->index;
|
||||
|
||||
/*
|
||||
* We need to log index creation in WAL iff WAL archiving is enabled
|
||||
* AND it's not a temp index.
|
||||
@@ -229,7 +230,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
|
||||
/* reserve the metapage */
|
||||
wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
|
||||
wstate.btws_pages_written = 0;
|
||||
wstate.btws_zeropage = NULL; /* until needed */
|
||||
wstate.btws_zeropage = NULL; /* until needed */
|
||||
|
||||
_bt_load(&wstate, btspool, btspool2);
|
||||
}
|
||||
@@ -246,7 +247,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
|
||||
static Page
|
||||
_bt_blnewpage(uint32 level)
|
||||
{
|
||||
Page page;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
|
||||
page = (Page) palloc(BLCKSZ);
|
||||
@@ -313,8 +314,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
|
||||
* If we have to write pages nonsequentially, fill in the space with
|
||||
* zeroes until we come back and overwrite. This is not logically
|
||||
* necessary on standard Unix filesystems (unwritten space will read
|
||||
* as zeroes anyway), but it should help to avoid fragmentation.
|
||||
* The dummy pages aren't WAL-logged though.
|
||||
* as zeroes anyway), but it should help to avoid fragmentation. The
|
||||
* dummy pages aren't WAL-logged though.
|
||||
*/
|
||||
while (blkno > wstate->btws_pages_written)
|
||||
{
|
||||
@@ -326,9 +327,9 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
|
||||
}
|
||||
|
||||
/*
|
||||
* Now write the page. We say isTemp = true even if it's not a
|
||||
* temp index, because there's no need for smgr to schedule an fsync
|
||||
* for this write; we'll do it ourselves before ending the build.
|
||||
* Now write the page. We say isTemp = true even if it's not a temp
|
||||
* index, because there's no need for smgr to schedule an fsync for
|
||||
* this write; we'll do it ourselves before ending the build.
|
||||
*/
|
||||
smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
|
||||
|
||||
@@ -468,7 +469,7 @@ static void
|
||||
_bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti)
|
||||
{
|
||||
Page npage;
|
||||
BlockNumber nblkno;
|
||||
BlockNumber nblkno;
|
||||
OffsetNumber last_off;
|
||||
Size pgspc;
|
||||
Size btisz;
|
||||
@@ -506,7 +507,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti)
|
||||
* already. Finish off the page and write it out.
|
||||
*/
|
||||
Page opage = npage;
|
||||
BlockNumber oblkno = nblkno;
|
||||
BlockNumber oblkno = nblkno;
|
||||
ItemId ii;
|
||||
ItemId hii;
|
||||
BTItem obti;
|
||||
@@ -539,8 +540,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti)
|
||||
((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
|
||||
|
||||
/*
|
||||
* Link the old page into its parent, using its minimum key. If
|
||||
* we don't have a parent, we have to create one; this adds a new
|
||||
* Link the old page into its parent, using its minimum key. If we
|
||||
* don't have a parent, we have to create one; this adds a new
|
||||
* btree level.
|
||||
*/
|
||||
if (state->btps_next == NULL)
|
||||
@@ -572,8 +573,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti)
|
||||
}
|
||||
|
||||
/*
|
||||
* Write out the old page. We never need to touch it again,
|
||||
* so we can free the opage workspace too.
|
||||
* Write out the old page. We never need to touch it again, so we
|
||||
* can free the opage workspace too.
|
||||
*/
|
||||
_bt_blwritepage(wstate, opage, oblkno);
|
||||
|
||||
@@ -613,7 +614,7 @@ static void
|
||||
_bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
|
||||
{
|
||||
BTPageState *s;
|
||||
BlockNumber rootblkno = P_NONE;
|
||||
BlockNumber rootblkno = P_NONE;
|
||||
uint32 rootlevel = 0;
|
||||
Page metapage;
|
||||
|
||||
@@ -663,9 +664,9 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
|
||||
|
||||
/*
|
||||
* As the last step in the process, construct the metapage and make it
|
||||
* point to the new root (unless we had no data at all, in which case it's
|
||||
* set to point to "P_NONE"). This changes the index to the "valid"
|
||||
* state by filling in a valid magic number in the metapage.
|
||||
* point to the new root (unless we had no data at all, in which case
|
||||
* it's set to point to "P_NONE"). This changes the index to the
|
||||
* "valid" state by filling in a valid magic number in the metapage.
|
||||
*/
|
||||
metapage = (Page) palloc(BLCKSZ);
|
||||
_bt_initmetapage(metapage, rootblkno, rootlevel);
|
||||
@@ -744,7 +745,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
|
||||
|
||||
compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
|
||||
attrDatum1,
|
||||
attrDatum2));
|
||||
attrDatum2));
|
||||
if (compare > 0)
|
||||
{
|
||||
load1 = false;
|
||||
@@ -768,7 +769,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
|
||||
if (should_free)
|
||||
pfree((void *) bti);
|
||||
bti = (BTItem) tuplesort_getindextuple(btspool->sortstate,
|
||||
true, &should_free);
|
||||
true, &should_free);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -776,7 +777,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
|
||||
if (should_free2)
|
||||
pfree((void *) bti2);
|
||||
bti2 = (BTItem) tuplesort_getindextuple(btspool2->sortstate,
|
||||
true, &should_free2);
|
||||
true, &should_free2);
|
||||
}
|
||||
}
|
||||
_bt_freeskey(indexScanKey);
|
||||
@@ -785,7 +786,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
|
||||
{
|
||||
/* merge is unnecessary */
|
||||
while ((bti = (BTItem) tuplesort_getindextuple(btspool->sortstate,
|
||||
true, &should_free)) != NULL)
|
||||
true, &should_free)) != NULL)
|
||||
{
|
||||
/* When we see first tuple, create first index page */
|
||||
if (state == NULL)
|
||||
@@ -802,18 +803,18 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
|
||||
|
||||
/*
|
||||
* If the index isn't temp, we must fsync it down to disk before it's
|
||||
* safe to commit the transaction. (For a temp index we don't care
|
||||
* safe to commit the transaction. (For a temp index we don't care
|
||||
* since the index will be uninteresting after a crash anyway.)
|
||||
*
|
||||
* It's obvious that we must do this when not WAL-logging the build.
|
||||
* It's less obvious that we have to do it even if we did WAL-log the
|
||||
* index pages. The reason is that since we're building outside
|
||||
* shared buffers, a CHECKPOINT occurring during the build has no way
|
||||
* to flush the previously written data to disk (indeed it won't know
|
||||
* the index even exists). A crash later on would replay WAL from the
|
||||
* It's obvious that we must do this when not WAL-logging the build. It's
|
||||
* less obvious that we have to do it even if we did WAL-log the index
|
||||
* pages. The reason is that since we're building outside shared
|
||||
* buffers, a CHECKPOINT occurring during the build has no way to
|
||||
* flush the previously written data to disk (indeed it won't know the
|
||||
* index even exists). A crash later on would replay WAL from the
|
||||
* checkpoint, therefore it wouldn't replay our earlier WAL entries.
|
||||
* If we do not fsync those pages here, they might still not be on disk
|
||||
* when the crash occurs.
|
||||
* If we do not fsync those pages here, they might still not be on
|
||||
* disk when the crash occurs.
|
||||
*/
|
||||
if (!wstate->index->rd_istemp)
|
||||
smgrimmedsync(wstate->index->rd_smgr);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.59 2004/08/29 04:12:21 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.60 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -48,8 +48,8 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
|
||||
bool null;
|
||||
|
||||
/*
|
||||
* We can use the cached (default) support procs since no cross-type
|
||||
* comparison can be needed.
|
||||
* We can use the cached (default) support procs since no
|
||||
* cross-type comparison can be needed.
|
||||
*/
|
||||
procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
|
||||
arg = index_getattr(itup, i + 1, itupdesc, &null);
|
||||
@@ -68,7 +68,7 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
|
||||
/*
|
||||
* _bt_mkscankey_nodata
|
||||
* Build a scan key that contains comparator routines appropriate to
|
||||
* the key datatypes, but no comparison data. The comparison data
|
||||
* the key datatypes, but no comparison data. The comparison data
|
||||
* ultimately used must match the key datatypes.
|
||||
*
|
||||
* The result cannot be used with _bt_compare(). Currently this
|
||||
@@ -93,8 +93,8 @@ _bt_mkscankey_nodata(Relation rel)
|
||||
FmgrInfo *procinfo;
|
||||
|
||||
/*
|
||||
* We can use the cached (default) support procs since no cross-type
|
||||
* comparison can be needed.
|
||||
* We can use the cached (default) support procs since no
|
||||
* cross-type comparison can be needed.
|
||||
*/
|
||||
procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
|
||||
ScanKeyEntryInitializeWithInfo(&skey[i],
|
||||
@@ -163,12 +163,12 @@ _bt_formitem(IndexTuple itup)
|
||||
* _bt_preprocess_keys() -- Preprocess scan keys
|
||||
*
|
||||
* The caller-supplied keys (in scan->keyData[]) are copied to
|
||||
* so->keyData[] with possible transformation. scan->numberOfKeys is
|
||||
* so->keyData[] with possible transformation. scan->numberOfKeys is
|
||||
* the number of input keys, so->numberOfKeys gets the number of output
|
||||
* keys (possibly less, never greater).
|
||||
*
|
||||
* The primary purpose of this routine is to discover how many scan keys
|
||||
* must be satisfied to continue the scan. It also attempts to eliminate
|
||||
* must be satisfied to continue the scan. It also attempts to eliminate
|
||||
* redundant keys and detect contradictory keys. At present, redundant and
|
||||
* contradictory keys can only be detected for same-data-type comparisons,
|
||||
* but that's the usual case so it seems worth doing.
|
||||
@@ -198,7 +198,7 @@ _bt_formitem(IndexTuple itup)
|
||||
* or one or two boundary-condition keys for each attr.) However, we can
|
||||
* only detect redundant keys when the right-hand datatypes are all equal
|
||||
* to the index datatype, because we do not know suitable operators for
|
||||
* comparing right-hand values of two different datatypes. (In theory
|
||||
* comparing right-hand values of two different datatypes. (In theory
|
||||
* we could handle comparison of a RHS of the index datatype with a RHS of
|
||||
* another type, but that seems too much pain for too little gain.) So,
|
||||
* keys whose operator has a nondefault subtype (ie, its RHS is not of the
|
||||
@@ -285,9 +285,9 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
*
|
||||
* xform[i] points to the currently best scan key of strategy type i+1,
|
||||
* if any is found with a default operator subtype; it is NULL if we
|
||||
* haven't yet found such a key for this attr. Scan keys of nondefault
|
||||
* subtypes are transferred to the output with no processing except for
|
||||
* noting if they are of "=" type.
|
||||
* haven't yet found such a key for this attr. Scan keys of
|
||||
* nondefault subtypes are transferred to the output with no
|
||||
* processing except for noting if they are of "=" type.
|
||||
*/
|
||||
attno = 1;
|
||||
memset(xform, 0, sizeof(xform));
|
||||
@@ -361,7 +361,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
/*
|
||||
* If no "=" for this key, we're done with required keys
|
||||
*/
|
||||
if (! hasOtherTypeEqual)
|
||||
if (!hasOtherTypeEqual)
|
||||
allEqualSoFar = false;
|
||||
}
|
||||
|
||||
@@ -369,8 +369,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
if (xform[BTLessStrategyNumber - 1]
|
||||
&& xform[BTLessEqualStrategyNumber - 1])
|
||||
{
|
||||
ScanKey lt = xform[BTLessStrategyNumber - 1];
|
||||
ScanKey le = xform[BTLessEqualStrategyNumber - 1];
|
||||
ScanKey lt = xform[BTLessStrategyNumber - 1];
|
||||
ScanKey le = xform[BTLessEqualStrategyNumber - 1];
|
||||
|
||||
test = FunctionCall2(&le->sk_func,
|
||||
lt->sk_argument,
|
||||
@@ -385,8 +385,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
if (xform[BTGreaterStrategyNumber - 1]
|
||||
&& xform[BTGreaterEqualStrategyNumber - 1])
|
||||
{
|
||||
ScanKey gt = xform[BTGreaterStrategyNumber - 1];
|
||||
ScanKey ge = xform[BTGreaterEqualStrategyNumber - 1];
|
||||
ScanKey gt = xform[BTGreaterStrategyNumber - 1];
|
||||
ScanKey ge = xform[BTGreaterEqualStrategyNumber - 1];
|
||||
|
||||
test = FunctionCall2(&ge->sk_func,
|
||||
gt->sk_argument,
|
||||
@@ -545,21 +545,23 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
|
||||
{
|
||||
/*
|
||||
* Tuple fails this qual. If it's a required qual, then we
|
||||
* may be able to conclude no further tuples will pass, either.
|
||||
* We have to look at the scan direction and the qual type.
|
||||
* may be able to conclude no further tuples will pass,
|
||||
* either. We have to look at the scan direction and the qual
|
||||
* type.
|
||||
*
|
||||
* Note: the only case in which we would keep going after failing
|
||||
* a required qual is if there are partially-redundant quals that
|
||||
* _bt_preprocess_keys() was unable to eliminate. For example,
|
||||
* given "x > 4 AND x > 10" where both are cross-type comparisons
|
||||
* and so not removable, we might start the scan at the x = 4
|
||||
* boundary point. The "x > 10" condition will fail until we
|
||||
* pass x = 10, but we must not stop the scan on its account.
|
||||
* a required qual is if there are partially-redundant quals
|
||||
* that _bt_preprocess_keys() was unable to eliminate. For
|
||||
* example, given "x > 4 AND x > 10" where both are cross-type
|
||||
* comparisons and so not removable, we might start the scan
|
||||
* at the x = 4 boundary point. The "x > 10" condition will
|
||||
* fail until we pass x = 10, but we must not stop the scan on
|
||||
* its account.
|
||||
*
|
||||
* Note: because we stop the scan as soon as any required equality
|
||||
* qual fails, it is critical that equality quals be used for the
|
||||
* initial positioning in _bt_first() when they are available.
|
||||
* See comments in _bt_first().
|
||||
* Note: because we stop the scan as soon as any required
|
||||
* equality qual fails, it is critical that equality quals be
|
||||
* used for the initial positioning in _bt_first() when they
|
||||
* are available. See comments in _bt_first().
|
||||
*/
|
||||
if (ikey < so->numberOfRequiredKeys)
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.17 2004/08/29 04:12:21 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.18 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -770,7 +770,7 @@ static void
|
||||
out_target(char *buf, xl_btreetid *target)
|
||||
{
|
||||
sprintf(buf + strlen(buf), "rel %u/%u/%u; tid %u/%u",
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
ItemPointerGetBlockNumber(&(target->tid)),
|
||||
ItemPointerGetOffsetNumber(&(target->tid)));
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.54 2004/08/29 04:12:22 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.55 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -123,7 +123,7 @@ rtrescan(PG_FUNCTION_ARGS)
|
||||
Oid int_oper;
|
||||
RegProcedure int_proc;
|
||||
|
||||
opclass = s->indexRelation->rd_index->indclass[attno-1];
|
||||
opclass = s->indexRelation->rd_index->indclass[attno - 1];
|
||||
int_strategy = RTMapToInternalOperator(s->keyData[i].sk_strategy);
|
||||
int_oper = get_opclass_member(opclass,
|
||||
s->keyData[i].sk_subtype,
|
||||
@@ -280,14 +280,14 @@ rtdropscan(IndexScanDesc s)
|
||||
void
|
||||
ReleaseResources_rtree(void)
|
||||
{
|
||||
RTScanList l;
|
||||
RTScanList prev;
|
||||
RTScanList next;
|
||||
RTScanList l;
|
||||
RTScanList prev;
|
||||
RTScanList next;
|
||||
|
||||
/*
|
||||
* Note: this should be a no-op during normal query shutdown.
|
||||
* However, in an abort situation ExecutorEnd is not called and so
|
||||
* there may be open index scans to clean up.
|
||||
* Note: this should be a no-op during normal query shutdown. However,
|
||||
* in an abort situation ExecutorEnd is not called and so there may be
|
||||
* open index scans to clean up.
|
||||
*/
|
||||
prev = NULL;
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.24 2004/08/29 04:12:23 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.25 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -62,6 +62,7 @@
|
||||
* Link to shared-memory data structures for CLOG control
|
||||
*/
|
||||
static SlruCtlData ClogCtlData;
|
||||
|
||||
#define ClogCtl (&ClogCtlData)
|
||||
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.20 2004/08/29 04:12:23 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.21 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -79,7 +79,7 @@
|
||||
* segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
|
||||
*
|
||||
* Note: this file currently assumes that segment file names will be four
|
||||
* hex digits. This sets a lower bound on the segment size (64K transactions
|
||||
* hex digits. This sets a lower bound on the segment size (64K transactions
|
||||
* for 32-bit TransactionIds).
|
||||
*/
|
||||
#define SLRU_PAGES_PER_SEGMENT 32
|
||||
@@ -96,9 +96,9 @@
|
||||
*/
|
||||
typedef struct SlruFlushData
|
||||
{
|
||||
int num_files; /* # files actually open */
|
||||
int fd[NUM_SLRU_BUFFERS]; /* their FD's */
|
||||
int segno[NUM_SLRU_BUFFERS]; /* their log seg#s */
|
||||
int num_files; /* # files actually open */
|
||||
int fd[NUM_SLRU_BUFFERS]; /* their FD's */
|
||||
int segno[NUM_SLRU_BUFFERS]; /* their log seg#s */
|
||||
} SlruFlushData;
|
||||
|
||||
/*
|
||||
@@ -132,7 +132,7 @@ static int slru_errno;
|
||||
|
||||
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
|
||||
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
|
||||
SlruFlush fdata);
|
||||
SlruFlush fdata);
|
||||
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
|
||||
static int SlruSelectLRUPage(SlruCtl ctl, int pageno);
|
||||
|
||||
@@ -385,7 +385,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
|
||||
/* If we failed, and we're in a flush, better close the files */
|
||||
if (!ok && fdata)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < fdata->num_files; i++)
|
||||
close(fdata->fd[i]);
|
||||
@@ -511,7 +511,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
|
||||
*/
|
||||
if (fdata)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < fdata->num_files; i++)
|
||||
{
|
||||
@@ -527,16 +527,17 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
|
||||
{
|
||||
/*
|
||||
* If the file doesn't already exist, we should create it. It is
|
||||
* possible for this to need to happen when writing a page that's not
|
||||
* first in its segment; we assume the OS can cope with that.
|
||||
* (Note: it might seem that it'd be okay to create files only when
|
||||
* SimpleLruZeroPage is called for the first page of a segment.
|
||||
* However, if after a crash and restart the REDO logic elects to
|
||||
* replay the log from a checkpoint before the latest one, then it's
|
||||
* possible that we will get commands to set transaction status of
|
||||
* transactions that have already been truncated from the commit log.
|
||||
* Easiest way to deal with that is to accept references to
|
||||
* nonexistent files here and in SlruPhysicalReadPage.)
|
||||
* possible for this to need to happen when writing a page that's
|
||||
* not first in its segment; we assume the OS can cope with that.
|
||||
* (Note: it might seem that it'd be okay to create files only
|
||||
* when SimpleLruZeroPage is called for the first page of a
|
||||
* segment. However, if after a crash and restart the REDO logic
|
||||
* elects to replay the log from a checkpoint before the latest
|
||||
* one, then it's possible that we will get commands to set
|
||||
* transaction status of transactions that have already been
|
||||
* truncated from the commit log. Easiest way to deal with that is
|
||||
* to accept references to nonexistent files here and in
|
||||
* SlruPhysicalReadPage.)
|
||||
*/
|
||||
SlruFileName(ctl, path, segno);
|
||||
fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
|
||||
@@ -648,36 +649,36 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not access status of transaction %u", xid),
|
||||
errdetail("could not seek in file \"%s\" to offset %u: %m",
|
||||
path, offset)));
|
||||
errdetail("could not seek in file \"%s\" to offset %u: %m",
|
||||
path, offset)));
|
||||
break;
|
||||
case SLRU_READ_FAILED:
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not access status of transaction %u", xid),
|
||||
errdetail("could not read from file \"%s\" at offset %u: %m",
|
||||
path, offset)));
|
||||
errdetail("could not read from file \"%s\" at offset %u: %m",
|
||||
path, offset)));
|
||||
break;
|
||||
case SLRU_WRITE_FAILED:
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not access status of transaction %u", xid),
|
||||
errdetail("could not write to file \"%s\" at offset %u: %m",
|
||||
path, offset)));
|
||||
errdetail("could not write to file \"%s\" at offset %u: %m",
|
||||
path, offset)));
|
||||
break;
|
||||
case SLRU_FSYNC_FAILED:
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not access status of transaction %u", xid),
|
||||
errdetail("could not fsync file \"%s\": %m",
|
||||
path)));
|
||||
errdetail("could not fsync file \"%s\": %m",
|
||||
path)));
|
||||
break;
|
||||
case SLRU_CLOSE_FAILED:
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not access status of transaction %u", xid),
|
||||
errdetail("could not close file \"%s\": %m",
|
||||
path)));
|
||||
errdetail("could not close file \"%s\": %m",
|
||||
path)));
|
||||
break;
|
||||
default:
|
||||
/* can't get here, we trust */
|
||||
@@ -841,8 +842,8 @@ SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
|
||||
/*
|
||||
* Scan shared memory and remove any pages preceding the cutoff page,
|
||||
* to ensure we won't rewrite them later. (Since this is normally
|
||||
* called in or just after a checkpoint, any dirty pages should
|
||||
* have been flushed already ... we're just being extra careful here.)
|
||||
* called in or just after a checkpoint, any dirty pages should have
|
||||
* been flushed already ... we're just being extra careful here.)
|
||||
*/
|
||||
LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
|
||||
|
||||
@@ -952,8 +953,11 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
|
||||
errno = 0;
|
||||
}
|
||||
#ifdef WIN32
|
||||
/* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
|
||||
not in released version */
|
||||
|
||||
/*
|
||||
* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
|
||||
* not in released version
|
||||
*/
|
||||
if (GetLastError() == ERROR_NO_MORE_FILES)
|
||||
errno = 0;
|
||||
#endif
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*
|
||||
* The pg_subtrans manager is a pg_clog-like manager that stores the parent
|
||||
* transaction Id for each transaction. It is a fundamental part of the
|
||||
* nested transactions implementation. A main transaction has a parent
|
||||
* nested transactions implementation. A main transaction has a parent
|
||||
* of InvalidTransactionId, and each subtransaction has its immediate parent.
|
||||
* The tree can easily be walked from child to parent, but not in the
|
||||
* opposite direction.
|
||||
@@ -22,7 +22,7 @@
|
||||
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.4 2004/08/29 04:12:23 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.5 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -57,6 +57,7 @@
|
||||
* Link to shared-memory data structures for SUBTRANS control
|
||||
*/
|
||||
static SlruCtlData SubTransCtlData;
|
||||
|
||||
#define SubTransCtl (&SubTransCtlData)
|
||||
|
||||
|
||||
@@ -101,7 +102,7 @@ SubTransGetParent(TransactionId xid)
|
||||
int entryno = TransactionIdToEntry(xid);
|
||||
int slotno;
|
||||
TransactionId *ptr;
|
||||
TransactionId parent;
|
||||
TransactionId parent;
|
||||
|
||||
/* Can't ask about stuff that might not be around anymore */
|
||||
Assert(TransactionIdFollowsOrEquals(xid, RecentXmin));
|
||||
@@ -139,7 +140,7 @@ TransactionId
|
||||
SubTransGetTopmostTransaction(TransactionId xid)
|
||||
{
|
||||
TransactionId parentXid = xid,
|
||||
previousXid = xid;
|
||||
previousXid = xid;
|
||||
|
||||
/* Can't ask about stuff that might not be around anymore */
|
||||
Assert(TransactionIdFollowsOrEquals(xid, RecentXmin));
|
||||
@@ -185,7 +186,7 @@ SUBTRANSShmemInit(void)
|
||||
* must have been called already.)
|
||||
*
|
||||
* Note: it's not really necessary to create the initial segment now,
|
||||
* since slru.c would create it on first write anyway. But we may as well
|
||||
* since slru.c would create it on first write anyway. But we may as well
|
||||
* do it to be sure the directory is set up correctly.
|
||||
*/
|
||||
void
|
||||
@@ -229,10 +230,11 @@ StartupSUBTRANS(void)
|
||||
int startPage;
|
||||
|
||||
/*
|
||||
* Since we don't expect pg_subtrans to be valid across crashes,
|
||||
* we initialize the currently-active page to zeroes during startup.
|
||||
* Since we don't expect pg_subtrans to be valid across crashes, we
|
||||
* initialize the currently-active page to zeroes during startup.
|
||||
* Whenever we advance into a new page, ExtendSUBTRANS will likewise
|
||||
* zero the new page without regard to whatever was previously on disk.
|
||||
* zero the new page without regard to whatever was previously on
|
||||
* disk.
|
||||
*/
|
||||
LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
|
||||
|
||||
@@ -251,8 +253,8 @@ ShutdownSUBTRANS(void)
|
||||
/*
|
||||
* Flush dirty SUBTRANS pages to disk
|
||||
*
|
||||
* This is not actually necessary from a correctness point of view.
|
||||
* We do it merely as a debugging aid.
|
||||
* This is not actually necessary from a correctness point of view. We do
|
||||
* it merely as a debugging aid.
|
||||
*/
|
||||
SimpleLruFlush(SubTransCtl, false);
|
||||
}
|
||||
@@ -266,8 +268,8 @@ CheckPointSUBTRANS(void)
|
||||
/*
|
||||
* Flush dirty SUBTRANS pages to disk
|
||||
*
|
||||
* This is not actually necessary from a correctness point of view.
|
||||
* We do it merely to improve the odds that writing of dirty pages is done
|
||||
* This is not actually necessary from a correctness point of view. We do
|
||||
* it merely to improve the odds that writing of dirty pages is done
|
||||
* by the checkpoint process and not by backends.
|
||||
*/
|
||||
SimpleLruFlush(SubTransCtl, true);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.60 2004/08/29 04:12:23 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.61 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file contains the high level access-method interface to the
|
||||
@@ -126,7 +126,7 @@ TransactionLogUpdate(TransactionId transactionId, /* trans id to update */
|
||||
static void
|
||||
TransactionLogMultiUpdate(int nxids, TransactionId *xids, XidStatus status)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
Assert(nxids != 0);
|
||||
|
||||
@@ -199,9 +199,10 @@ TransactionIdDidCommit(TransactionId transactionId)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If it's marked subcommitted, we have to check the parent recursively.
|
||||
* However, if it's older than RecentXmin, we can't look at pg_subtrans;
|
||||
* instead assume that the parent crashed without cleaning up its children.
|
||||
* If it's marked subcommitted, we have to check the parent
|
||||
* recursively. However, if it's older than RecentXmin, we can't look
|
||||
* at pg_subtrans; instead assume that the parent crashed without
|
||||
* cleaning up its children.
|
||||
*/
|
||||
if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED)
|
||||
{
|
||||
@@ -214,7 +215,7 @@ TransactionIdDidCommit(TransactionId transactionId)
|
||||
return TransactionIdDidCommit(parentXid);
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* It's not committed.
|
||||
*/
|
||||
return false;
|
||||
@@ -247,9 +248,10 @@ TransactionIdDidAbort(TransactionId transactionId)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If it's marked subcommitted, we have to check the parent recursively.
|
||||
* However, if it's older than RecentXmin, we can't look at pg_subtrans;
|
||||
* instead assume that the parent crashed without cleaning up its children.
|
||||
* If it's marked subcommitted, we have to check the parent
|
||||
* recursively. However, if it's older than RecentXmin, we can't look
|
||||
* at pg_subtrans; instead assume that the parent crashed without
|
||||
* cleaning up its children.
|
||||
*/
|
||||
if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED)
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
* Copyright (c) 2000-2004, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.58 2004/08/29 04:12:23 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.59 2004/08/29 05:06:40 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -47,9 +47,9 @@ GetNewTransactionId(bool isSubXact)
|
||||
xid = ShmemVariableCache->nextXid;
|
||||
|
||||
/*
|
||||
* If we are allocating the first XID of a new page of the commit
|
||||
* log, zero out that commit-log page before returning. We must do
|
||||
* this while holding XidGenLock, else another xact could acquire and
|
||||
* If we are allocating the first XID of a new page of the commit log,
|
||||
* zero out that commit-log page before returning. We must do this
|
||||
* while holding XidGenLock, else another xact could acquire and
|
||||
* commit a later XID before we zero the page. Fortunately, a page of
|
||||
* the commit log holds 32K or more transactions, so we don't have to
|
||||
* do this very often.
|
||||
@@ -61,17 +61,18 @@ GetNewTransactionId(bool isSubXact)
|
||||
|
||||
/*
|
||||
* Now advance the nextXid counter. This must not happen until after
|
||||
* we have successfully completed ExtendCLOG() --- if that routine fails,
|
||||
* we want the next incoming transaction to try it again. We cannot
|
||||
* assign more XIDs until there is CLOG space for them.
|
||||
* we have successfully completed ExtendCLOG() --- if that routine
|
||||
* fails, we want the next incoming transaction to try it again. We
|
||||
* cannot assign more XIDs until there is CLOG space for them.
|
||||
*/
|
||||
TransactionIdAdvance(ShmemVariableCache->nextXid);
|
||||
|
||||
/*
|
||||
* We must store the new XID into the shared PGPROC array before releasing
|
||||
* XidGenLock. This ensures that when GetSnapshotData calls
|
||||
* We must store the new XID into the shared PGPROC array before
|
||||
* releasing XidGenLock. This ensures that when GetSnapshotData calls
|
||||
* ReadNewTransactionId, all active XIDs before the returned value of
|
||||
* nextXid are already present in PGPROC. Else we have a race condition.
|
||||
* nextXid are already present in PGPROC. Else we have a race
|
||||
* condition.
|
||||
*
|
||||
* XXX by storing xid into MyProc without acquiring SInvalLock, we are
|
||||
* relying on fetch/store of an xid to be atomic, else other backends
|
||||
@@ -86,19 +87,19 @@ GetNewTransactionId(bool isSubXact)
|
||||
*
|
||||
* A solution to the atomic-store problem would be to give each PGPROC
|
||||
* its own spinlock used only for fetching/storing that PGPROC's xid
|
||||
* and related fields. (SInvalLock would then mean primarily that
|
||||
* and related fields. (SInvalLock would then mean primarily that
|
||||
* PGPROCs couldn't be added/removed while holding the lock.)
|
||||
*
|
||||
* If there's no room to fit a subtransaction XID into PGPROC, set the
|
||||
* cache-overflowed flag instead. This forces readers to look in
|
||||
* pg_subtrans to map subtransaction XIDs up to top-level XIDs.
|
||||
* There is a race-condition window, in that the new XID will not
|
||||
* appear as running until its parent link has been placed into
|
||||
* pg_subtrans. However, that will happen before anyone could possibly
|
||||
* have a reason to inquire about the status of the XID, so it seems
|
||||
* OK. (Snapshots taken during this window *will* include the parent
|
||||
* XID, so they will deliver the correct answer later on when someone
|
||||
* does have a reason to inquire.)
|
||||
* pg_subtrans to map subtransaction XIDs up to top-level XIDs. There
|
||||
* is a race-condition window, in that the new XID will not appear as
|
||||
* running until its parent link has been placed into pg_subtrans.
|
||||
* However, that will happen before anyone could possibly have a
|
||||
* reason to inquire about the status of the XID, so it seems OK.
|
||||
* (Snapshots taken during this window *will* include the parent XID,
|
||||
* so they will deliver the correct answer later on when someone does
|
||||
* have a reason to inquire.)
|
||||
*/
|
||||
if (MyProc != NULL)
|
||||
{
|
||||
@@ -112,9 +113,7 @@ GetNewTransactionId(bool isSubXact)
|
||||
MyProc->subxids.nxids++;
|
||||
}
|
||||
else
|
||||
{
|
||||
MyProc->subxids.overflowed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -11,7 +11,7 @@
|
||||
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.33 2004/08/29 04:12:23 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.34 2004/08/29 05:06:41 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -212,11 +212,11 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
|
||||
res->reldata.rd_node = rnode;
|
||||
|
||||
/*
|
||||
* We set up the lockRelId in case anything tries to lock the dummy
|
||||
* relation. Note that this is fairly bogus since relNode may be
|
||||
* different from the relation's OID. It shouldn't really matter
|
||||
* though, since we are presumably running by ourselves and can't
|
||||
* have any lock conflicts ...
|
||||
* We set up the lockRelId in case anything tries to lock the
|
||||
* dummy relation. Note that this is fairly bogus since relNode
|
||||
* may be different from the relation's OID. It shouldn't really
|
||||
* matter though, since we are presumably running by ourselves and
|
||||
* can't have any lock conflicts ...
|
||||
*/
|
||||
res->reldata.rd_lockInfo.lockRelId.dbId = rnode.dbNode;
|
||||
res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;
|
||||
@@ -234,14 +234,15 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
|
||||
|
||||
res->reldata.rd_targblock = InvalidBlockNumber;
|
||||
res->reldata.rd_smgr = smgropen(res->reldata.rd_node);
|
||||
|
||||
/*
|
||||
* Create the target file if it doesn't already exist. This lets
|
||||
* us cope if the replay sequence contains writes to a relation
|
||||
* that is later deleted. (The original coding of this routine
|
||||
* would instead return NULL, causing the writes to be suppressed.
|
||||
* But that seems like it risks losing valuable data if the filesystem
|
||||
* loses an inode during a crash. Better to write the data until we
|
||||
* are actually told to delete the file.)
|
||||
* But that seems like it risks losing valuable data if the
|
||||
* filesystem loses an inode during a crash. Better to write the
|
||||
* data until we are actually told to delete the file.)
|
||||
*/
|
||||
smgrcreate(res->reldata.rd_smgr, res->reldata.rd_istemp, true);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user