diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 97a624f453e..646e37d79fc 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,4 +1,4 @@ - + @@ -451,6 +451,13 @@ Can an index of this type be clustered on? + + amkeytype + oid + pg_type.oid + Type of data stored in index, or zero if not a fixed type + + aminsert regproc @@ -6424,7 +6431,7 @@ sourceline text - Line number within the sourcefile the current value was set + Line number within the sourcefile the current value was set from (NULL for values set in sources other than configuration files) diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 41607c54dc3..af4c4c058fd 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.104 2008/06/19 00:46:03 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.105 2008/09/15 18:43:41 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -79,12 +79,12 @@ hashbuild(PG_FUNCTION_ARGS) * then we'll thrash horribly. To prevent that scenario, we can sort the * tuples by (expected) bucket number. However, such a sort is useless * overhead when the index does fit in RAM. We choose to sort if the - * initial index size exceeds effective_cache_size. + * initial index size exceeds NBuffers. * * NOTE: this test will need adjustment if a bucket is ever different * from one page. */ - if (num_buckets >= (uint32) effective_cache_size) + if (num_buckets >= (uint32) NBuffers) buildstate.spool = _h_spoolinit(index, num_buckets); else buildstate.spool = NULL; @@ -129,7 +129,7 @@ hashbuildCallback(Relation index, IndexTuple itup; /* form an index tuple and point it at the heap tuple */ - itup = index_form_tuple(RelationGetDescr(index), values, isnull); + itup = _hash_form_tuple(index, values, isnull); itup->t_tid = htup->t_self; /* Hash indexes don't index nulls, see notes in hashinsert */ @@ -153,8 +153,8 @@ hashbuildCallback(Relation index, /* * hashinsert() -- insert an index tuple into a hash table. * - * Hash on the index tuple's key, find the appropriate location - * for the new tuple, and put it there. + * Hash on the heap tuple's key, form an index tuple with hash code. + * Find the appropriate location for the new tuple, and put it there. */ Datum hashinsert(PG_FUNCTION_ARGS) @@ -171,7 +171,7 @@ hashinsert(PG_FUNCTION_ARGS) IndexTuple itup; /* generate an index tuple */ - itup = index_form_tuple(RelationGetDescr(rel), values, isnull); + itup = _hash_form_tuple(rel, values, isnull); itup->t_tid = *ht_ctid; /* @@ -211,8 +211,8 @@ hashgettuple(PG_FUNCTION_ARGS) OffsetNumber offnum; bool res; - /* Hash indexes are never lossy (at the moment anyway) */ - scan->xs_recheck = false; + /* Hash indexes are always lossy since we store only the hash code */ + scan->xs_recheck = true; /* * We hold pin but not lock on current buffer while outside the hash AM. @@ -317,7 +317,8 @@ hashgetbitmap(PG_FUNCTION_ARGS) /* Save tuple ID, and continue scanning */ if (add_tuple) { - tbm_add_tuples(tbm, &scan->xs_ctup.t_self, 1, false); + /* Note we mark the tuple ID as requiring recheck */ + tbm_add_tuples(tbm, &scan->xs_ctup.t_self, 1, true); ntids++; } @@ -527,7 +528,7 @@ hashbulkdelete(PG_FUNCTION_ARGS) * each bucket. */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); - metap = (HashMetaPage) BufferGetPage(metabuf); + metap = HashPageGetMeta(BufferGetPage(metabuf)); orig_maxbucket = metap->hashm_maxbucket; orig_ntuples = metap->hashm_ntuples; memcpy(&local_metapage, metap, sizeof(local_metapage)); @@ -629,7 +630,7 @@ loop_top: /* Write-lock metapage and check for split since we started */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE, LH_META_PAGE); - metap = (HashMetaPage) BufferGetPage(metabuf); + metap = HashPageGetMeta(BufferGetPage(metabuf)); if (cur_maxbucket != metap->hashm_maxbucket) { diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index 7f68318f1a6..6195c8a2ac2 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.50 2008/06/19 00:46:03 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.51 2008/09/15 18:43:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -43,18 +43,11 @@ _hash_doinsert(Relation rel, IndexTuple itup) bool do_expand; uint32 hashkey; Bucket bucket; - Datum datum; - bool isnull; /* - * Compute the hash key for the item. We do this first so as not to need - * to hold any locks while running the hash function. + * Get the hash key for the item (it's stored in the index tuple itself). */ - if (rel->rd_rel->relnatts != 1) - elog(ERROR, "hash indexes support only one index key"); - datum = index_getattr(itup, 1, RelationGetDescr(rel), &isnull); - Assert(!isnull); - hashkey = _hash_datum2hashkey(rel, datum); + hashkey = _hash_get_indextuple_hashkey(itup); /* compute item size too */ itemsz = IndexTupleDSize(*itup); @@ -69,12 +62,14 @@ _hash_doinsert(Relation rel, IndexTuple itup) /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); - metap = (HashMetaPage) BufferGetPage(metabuf); + metap = HashPageGetMeta(BufferGetPage(metabuf)); /* * Check whether the item can fit on a hash page at all. (Eventually, we * ought to try to apply TOAST methods if not.) Note that at this point, * itemsz doesn't include the ItemId. + * + * XXX this is useless code if we are only storing hash keys. */ if (itemsz > HashMaxItemSize((Page) metap)) ereport(ERROR, @@ -197,11 +192,15 @@ _hash_pgaddtup(Relation rel, { OffsetNumber itup_off; Page page; + uint32 hashkey; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); - itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + /* Find where to insert the tuple (preserving page's hashkey ordering) */ + hashkey = _hash_get_indextuple_hashkey(itup); + itup_off = _hash_binsearch(page, hashkey); + if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 06958ec8657..37315dbf378 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.64 2008/06/19 00:46:03 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.65 2008/09/15 18:43:41 tgl Exp $ * * NOTES * Overflow pages look like ordinary relation pages. @@ -187,7 +187,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf) _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE); _hash_checkpage(rel, metabuf, LH_META_PAGE); - metap = (HashMetaPage) BufferGetPage(metabuf); + metap = HashPageGetMeta(BufferGetPage(metabuf)); /* start search at hashm_firstfree */ orig_firstfree = metap->hashm_firstfree; @@ -450,7 +450,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, /* Read the metapage so we can determine which bitmap page to use */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); - metap = (HashMetaPage) BufferGetPage(metabuf); + metap = HashPageGetMeta(BufferGetPage(metabuf)); /* Identify which bit to set */ ovflbitno = blkno_to_bitno(metap, ovflblkno); diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 43ec69cab32..c5edf6dcfb9 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.76 2008/08/11 11:05:10 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.77 2008/09/15 18:43:41 tgl Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque @@ -348,11 +348,9 @@ _hash_metapinit(Relation rel, double num_tuples) * Determine the target fill factor (in tuples per bucket) for this index. * The idea is to make the fill factor correspond to pages about as full * as the user-settable fillfactor parameter says. We can compute it - * exactly if the index datatype is fixed-width, but for var-width there's - * some guessing involved. + * exactly since the index datatype (i.e. uint32 hash key) is fixed-width. */ - data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid, - RelationGetDescr(rel)->attrs[0]->atttypmod); + data_width = sizeof(uint32); item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) + sizeof(ItemIdData); /* include the line pointer */ ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width; @@ -395,20 +393,18 @@ _hash_metapinit(Relation rel, double num_tuples) pageopaque->hasho_flag = LH_META_PAGE; pageopaque->hasho_page_id = HASHO_PAGE_ID; - metap = (HashMetaPage) pg; + metap = HashPageGetMeta(pg); metap->hashm_magic = HASH_MAGIC; metap->hashm_version = HASH_VERSION; metap->hashm_ntuples = 0; metap->hashm_nmaps = 0; metap->hashm_ffactor = ffactor; - metap->hashm_bsize = BufferGetPageSize(metabuf); + metap->hashm_bsize = HashGetMaxBitmapSize(pg); /* find largest bitmap array size that will fit in page size */ for (i = _hash_log2(metap->hashm_bsize); i > 0; --i) { - if ((1 << i) <= (metap->hashm_bsize - - (MAXALIGN(sizeof(PageHeaderData)) + - MAXALIGN(sizeof(HashPageOpaqueData))))) + if ((1 << i) <= metap->hashm_bsize) break; } Assert(i > 0); @@ -532,7 +528,7 @@ _hash_expandtable(Relation rel, Buffer metabuf) _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE); _hash_checkpage(rel, metabuf, LH_META_PAGE); - metap = (HashMetaPage) BufferGetPage(metabuf); + metap = HashPageGetMeta(BufferGetPage(metabuf)); /* * Check to see if split is still needed; someone else might have already @@ -774,8 +770,6 @@ _hash_splitbucket(Relation rel, Buffer nbuf; BlockNumber oblkno; BlockNumber nblkno; - bool null; - Datum datum; HashPageOpaque oopaque; HashPageOpaque nopaque; IndexTuple itup; @@ -785,7 +779,6 @@ _hash_splitbucket(Relation rel, OffsetNumber omaxoffnum; Page opage; Page npage; - TupleDesc itupdesc = RelationGetDescr(rel); /* * It should be okay to simultaneously write-lock pages from each bucket, @@ -846,16 +839,11 @@ _hash_splitbucket(Relation rel, } /* - * Re-hash the tuple to determine which bucket it now belongs in. - * - * It is annoying to call the hash function while holding locks, but - * releasing and relocking the page for each tuple is unappealing too. + * Fetch the item's hash key (conveniently stored in the item) + * and determine which bucket it now belongs in. */ itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum)); - datum = index_getattr(itup, 1, itupdesc, &null); - Assert(!null); - - bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum), + bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup), maxbucket, highmask, lowmask); if (bucket == nbucket) diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index 1e05558523f..85368393423 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.53 2008/06/19 00:46:03 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.54 2008/09/15 18:43:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -178,6 +178,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument, cur->sk_subtype); + so->hashso_sk_hash = hashkey; + /* * Acquire shared split lock so we can compute the target bucket safely * (see README). @@ -186,7 +188,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); - metap = (HashMetaPage) BufferGetPage(metabuf); + metap = HashPageGetMeta(BufferGetPage(metabuf)); /* * Compute the target bucket number, and convert to block number. @@ -284,7 +286,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) offnum = InvalidOffsetNumber; /* - * 'offnum' now points to the last tuple we have seen (if any). + * 'offnum' now points to the last tuple we examined (if any). * * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. @@ -297,25 +299,39 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) if (offnum != InvalidOffsetNumber) offnum = OffsetNumberNext(offnum); /* move forward */ else - offnum = FirstOffsetNumber; /* new page */ + { + /* new page, locate starting position by binary search */ + offnum = _hash_binsearch(page, so->hashso_sk_hash); + } - while (offnum > maxoff) + for (;;) { /* - * either this page is empty (maxoff == - * InvalidOffsetNumber) or we ran off the end. + * check if we're still in the range of items with + * the target hash key + */ + if (offnum <= maxoff) + { + Assert(offnum >= FirstOffsetNumber); + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup)) + break; /* yes, so exit for-loop */ + } + + /* + * ran off the end of this page, try the next */ _hash_readnext(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { maxoff = PageGetMaxOffsetNumber(page); - offnum = FirstOffsetNumber; + offnum = _hash_binsearch(page, so->hashso_sk_hash); } else { /* end of bucket */ - maxoff = offnum = InvalidOffsetNumber; - break; /* exit while */ + itup = NULL; + break; /* exit for-loop */ } } break; @@ -324,22 +340,39 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) if (offnum != InvalidOffsetNumber) offnum = OffsetNumberPrev(offnum); /* move back */ else - offnum = maxoff; /* new page */ + { + /* new page, locate starting position by binary search */ + offnum = _hash_binsearch_last(page, so->hashso_sk_hash); + } - while (offnum < FirstOffsetNumber) + for (;;) { /* - * either this page is empty (offnum == - * InvalidOffsetNumber) or we ran off the end. + * check if we're still in the range of items with + * the target hash key + */ + if (offnum >= FirstOffsetNumber) + { + Assert(offnum <= maxoff); + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup)) + break; /* yes, so exit for-loop */ + } + + /* + * ran off the end of this page, try the next */ _hash_readprev(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) - maxoff = offnum = PageGetMaxOffsetNumber(page); + { + maxoff = PageGetMaxOffsetNumber(page); + offnum = _hash_binsearch_last(page, so->hashso_sk_hash); + } else { /* end of bucket */ - maxoff = offnum = InvalidOffsetNumber; - break; /* exit while */ + itup = NULL; + break; /* exit for-loop */ } } break; @@ -347,19 +380,19 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) default: /* NoMovementScanDirection */ /* this should not be reached */ + itup = NULL; break; } - /* we ran off the end of the world without finding a match */ - if (offnum == InvalidOffsetNumber) + if (itup == NULL) { + /* we ran off the end of the bucket without finding a match */ *bufP = so->hashso_curbuf = InvalidBuffer; ItemPointerSetInvalid(current); return false; } - /* get ready to check this tuple */ - itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + /* check the tuple quals, loop around if not met */ } while (!_hash_checkqual(scan, itup)); /* if we made it to here, we've found a valid tuple */ diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c index 29cdf24529a..7a1e3a8ad0b 100644 --- a/src/backend/access/hash/hashutil.c +++ b/src/backend/access/hash/hashutil.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashutil.c,v 1.56 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashutil.c,v 1.57 2008/09/15 18:43:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,12 +28,21 @@ bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup) { + /* + * Currently, we can't check any of the scan conditions since we do + * not have the original index entry value to supply to the sk_func. + * Always return true; we expect that hashgettuple already set the + * recheck flag to make the main indexscan code do it. + */ +#ifdef NOT_USED TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); ScanKey key = scan->keyData; int scanKeySize = scan->numberOfKeys; +#endif IncrIndexProcessed(); +#ifdef NOT_USED while (scanKeySize > 0) { Datum datum; @@ -59,6 +68,7 @@ _hash_checkqual(IndexScanDesc scan, IndexTuple itup) key++; scanKeySize--; } +#endif return true; } @@ -190,7 +200,7 @@ _hash_checkpage(Relation rel, Buffer buf, int flags) */ if (flags == LH_META_PAGE) { - HashMetaPage metap = (HashMetaPage) page; + HashMetaPage metap = HashPageGetMeta(page); if (metap->hashm_magic != HASH_MAGIC) ereport(ERROR, @@ -221,3 +231,123 @@ hashoptions(PG_FUNCTION_ARGS) PG_RETURN_BYTEA_P(result); PG_RETURN_NULL(); } + +/* + * _hash_get_indextuple_hashkey - get the hash index tuple's hash key value + */ +uint32 +_hash_get_indextuple_hashkey(IndexTuple itup) +{ + char *attp; + + /* + * We assume the hash key is the first attribute and can't be null, + * so this can be done crudely but very very cheaply ... + */ + attp = (char *) itup + IndexInfoFindDataOffset(itup->t_info); + return *((uint32 *) attp); +} + +/* + * _hash_form_tuple - form an index tuple containing hash code only + */ +IndexTuple +_hash_form_tuple(Relation index, Datum *values, bool *isnull) +{ + IndexTuple itup; + uint32 hashkey; + Datum hashkeydatum; + TupleDesc hashdesc; + + if (isnull[0]) + hashkeydatum = (Datum) 0; + else + { + hashkey = _hash_datum2hashkey(index, values[0]); + hashkeydatum = UInt32GetDatum(hashkey); + } + hashdesc = RelationGetDescr(index); + Assert(hashdesc->natts == 1); + itup = index_form_tuple(hashdesc, &hashkeydatum, isnull); + return itup; +} + +/* + * _hash_binsearch - Return the offset number in the page where the + * specified hash value should be sought or inserted. + * + * We use binary search, relying on the assumption that the existing entries + * are ordered by hash key. + * + * Returns the offset of the first index entry having hashkey >= hash_value, + * or the page's max offset plus one if hash_value is greater than all + * existing hash keys in the page. This is the appropriate place to start + * a search, or to insert a new item. + */ +OffsetNumber +_hash_binsearch(Page page, uint32 hash_value) +{ + OffsetNumber upper; + OffsetNumber lower; + + /* Loop invariant: lower <= desired place <= upper */ + upper = PageGetMaxOffsetNumber(page) + 1; + lower = FirstOffsetNumber; + + while (upper > lower) + { + OffsetNumber off; + IndexTuple itup; + uint32 hashkey; + + off = (upper + lower) / 2; + Assert(OffsetNumberIsValid(off)); + + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); + hashkey = _hash_get_indextuple_hashkey(itup); + if (hashkey < hash_value) + lower = off + 1; + else + upper = off; + } + + return lower; +} + +/* + * _hash_binsearch_last + * + * Same as above, except that if there are multiple matching items in the + * page, we return the offset of the last one instead of the first one, + * and the possible range of outputs is 0..maxoffset not 1..maxoffset+1. + * This is handy for starting a new page in a backwards scan. + */ +OffsetNumber +_hash_binsearch_last(Page page, uint32 hash_value) +{ + OffsetNumber upper; + OffsetNumber lower; + + /* Loop invariant: lower <= desired place <= upper */ + upper = PageGetMaxOffsetNumber(page); + lower = FirstOffsetNumber - 1; + + while (upper > lower) + { + IndexTuple itup; + OffsetNumber off; + uint32 hashkey; + + off = (upper + lower + 1) / 2; + Assert(OffsetNumberIsValid(off)); + + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); + hashkey = _hash_get_indextuple_hashkey(itup); + if (hashkey > hash_value) + upper = off - 1; + else + lower = off; + } + + return lower; +} diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 1847f023e4a..301e7d1f2d5 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.303 2008/08/25 22:42:32 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.304 2008/09/15 18:43:41 tgl Exp $ * * * INTERFACE ROUTINES @@ -76,6 +76,7 @@ typedef struct /* non-export function prototypes */ static TupleDesc ConstructTupleDescriptor(Relation heapRelation, IndexInfo *indexInfo, + Oid accessMethodObjectId, Oid *classObjectId); static void InitializeAttributeOids(Relation indexRelation, int numatts, Oid indexoid); @@ -105,15 +106,28 @@ static Oid IndexGetRelation(Oid indexId); static TupleDesc ConstructTupleDescriptor(Relation heapRelation, IndexInfo *indexInfo, + Oid accessMethodObjectId, Oid *classObjectId) { int numatts = indexInfo->ii_NumIndexAttrs; ListCell *indexpr_item = list_head(indexInfo->ii_Expressions); + HeapTuple amtuple; + Form_pg_am amform; TupleDesc heapTupDesc; TupleDesc indexTupDesc; int natts; /* #atts in heap rel --- for error checks */ int i; + /* We need access to the index AM's pg_am tuple */ + amtuple = SearchSysCache(AMOID, + ObjectIdGetDatum(accessMethodObjectId), + 0, 0, 0); + if (!HeapTupleIsValid(amtuple)) + elog(ERROR, "cache lookup failed for access method %u", + accessMethodObjectId); + amform = (Form_pg_am) GETSTRUCT(amtuple); + + /* ... and to the table's tuple descriptor */ heapTupDesc = RelationGetDescr(heapRelation); natts = RelationGetForm(heapRelation)->relnatts; @@ -133,6 +147,7 @@ ConstructTupleDescriptor(Relation heapRelation, Form_pg_attribute to = indexTupDesc->attrs[i]; HeapTuple tuple; Form_pg_type typeTup; + Form_pg_opclass opclassTup; Oid keyType; if (atnum != 0) @@ -231,8 +246,8 @@ ConstructTupleDescriptor(Relation heapRelation, to->attrelid = InvalidOid; /* - * Check the opclass to see if it provides a keytype (overriding the - * attribute type). + * Check the opclass and index AM to see if either provides a keytype + * (overriding the attribute type). Opclass takes precedence. */ tuple = SearchSysCache(CLAOID, ObjectIdGetDatum(classObjectId[i]), @@ -240,7 +255,11 @@ ConstructTupleDescriptor(Relation heapRelation, if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for opclass %u", classObjectId[i]); - keyType = ((Form_pg_opclass) GETSTRUCT(tuple))->opckeytype; + opclassTup = (Form_pg_opclass) GETSTRUCT(tuple); + if (OidIsValid(opclassTup->opckeytype)) + keyType = opclassTup->opckeytype; + else + keyType = amform->amkeytype; ReleaseSysCache(tuple); if (OidIsValid(keyType) && keyType != to->atttypid) @@ -264,6 +283,8 @@ ConstructTupleDescriptor(Relation heapRelation, } } + ReleaseSysCache(amtuple); + return indexTupDesc; } @@ -577,6 +598,7 @@ index_create(Oid heapRelationId, */ indexTupDesc = ConstructTupleDescriptor(heapRelation, indexInfo, + accessMethodObjectId, classObjectId); /* diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 775840da185..29a076e1384 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -91,7 +91,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.86 2008/08/01 13:16:09 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.87 2008/09/15 18:43:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -101,7 +101,6 @@ #include #include "access/genam.h" -#include "access/hash.h" #include "access/nbtree.h" #include "catalog/pg_amop.h" #include "catalog/pg_operator.h" @@ -353,7 +352,6 @@ struct Tuplesortstate bool enforceUnique; /* complain if we find duplicate tuples */ /* These are specific to the index_hash subcase: */ - FmgrInfo *hash_proc; /* call info for the hash function */ uint32 hash_mask; /* mask for sortable part of hash code */ /* @@ -689,13 +687,6 @@ tuplesort_begin_index_hash(Relation indexRel, state->indexRel = indexRel; - /* - * We look up the index column's hash function just once, to avoid - * chewing lots of cycles in repeated index_getprocinfo calls. This - * assumes that our caller holds the index relation open throughout the - * sort, else the pointer obtained here might cease to be valid. - */ - state->hash_proc = index_getprocinfo(indexRel, 1, HASHPROC); state->hash_mask = hash_mask; MemoryContextSwitchTo(oldcontext); @@ -2821,11 +2812,6 @@ static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) { - /* - * It's slightly annoying to redo the hash function each time, although - * most hash functions ought to be cheap. Is it worth having a variant - * tuple storage format so we can store the hash code? - */ uint32 hash1; uint32 hash2; IndexTuple tuple1; @@ -2834,13 +2820,14 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b, /* Allow interrupting long sorts */ CHECK_FOR_INTERRUPTS(); - /* Compute hash codes and mask off bits we don't want to sort by */ + /* + * Fetch hash keys and mask off bits we don't want to sort by. + * We know that the first column of the index tuple is the hash key. + */ Assert(!a->isnull1); - hash1 = DatumGetUInt32(FunctionCall1(state->hash_proc, a->datum1)) - & state->hash_mask; + hash1 = DatumGetUInt32(a->datum1) & state->hash_mask; Assert(!b->isnull1); - hash2 = DatumGetUInt32(FunctionCall1(state->hash_proc, b->datum1)) - & state->hash_mask; + hash2 = DatumGetUInt32(b->datum1) & state->hash_mask; if (hash1 > hash2) return 1; diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 0dab2b6ae91..e00176d4519 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.89 2008/07/13 20:45:47 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.90 2008/09/15 18:43:41 tgl Exp $ * * NOTES * modeled after Margo Seltzer's hash implementation for unix. @@ -75,6 +75,9 @@ typedef HashPageOpaqueData *HashPageOpaque; */ typedef struct HashScanOpaqueData { + /* Hash value of the scan key, ie, the hash key we seek */ + uint32 hashso_sk_hash; + /* * By definition, a hash scan should be examining only one bucket. We * record the bucket number here as soon as it is known. @@ -111,7 +114,7 @@ typedef HashScanOpaqueData *HashScanOpaque; #define HASH_METAPAGE 0 /* metapage is always block 0 */ #define HASH_MAGIC 0x6440640 -#define HASH_VERSION 1 /* new for Pg 7.4 */ +#define HASH_VERSION 2 /* 2 signifies only hash key value is stored */ /* * Spares[] holds the number of overflow pages currently allocated at or @@ -138,7 +141,6 @@ typedef HashScanOpaqueData *HashScanOpaque; typedef struct HashMetaPageData { - PageHeaderData hashm_phdr; /* pad for page header (do not use) */ uint32 hashm_magic; /* magic no. for hash tables */ uint32 hashm_version; /* version ID */ double hashm_ntuples; /* number of tuples stored in the table */ @@ -191,8 +193,16 @@ typedef HashMetaPageData *HashMetaPage; #define BMPGSZ_BIT(metap) ((metap)->hashm_bmsize << BYTE_TO_BIT) #define BMPG_SHIFT(metap) ((metap)->hashm_bmshift) #define BMPG_MASK(metap) (BMPGSZ_BIT(metap) - 1) -#define HashPageGetBitmap(pg) \ - ((uint32 *) (((char *) (pg)) + MAXALIGN(sizeof(PageHeaderData)))) + +#define HashPageGetBitmap(page) \ + ((uint32 *) PageGetContents(page)) + +#define HashGetMaxBitmapSize(page) \ + (PageGetPageSize((Page) page) - \ + (MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(HashPageOpaqueData)))) + +#define HashPageGetMeta(page) \ + ((HashMetaPage) PageGetContents(page)) /* * The number of bits in an ovflpage bitmap word. @@ -330,6 +340,11 @@ extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask); extern uint32 _hash_log2(uint32 num); extern void _hash_checkpage(Relation rel, Buffer buf, int flags); +extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup); +extern IndexTuple _hash_form_tuple(Relation index, + Datum *values, bool *isnull); +extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value); +extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value); /* hash.c */ extern void hash_redo(XLogRecPtr lsn, XLogRecord *record); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 6e4b4d40f9d..bd08779e713 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.485 2008/09/10 18:09:20 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.486 2008/09/15 18:43:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200809101 +#define CATALOG_VERSION_NO 200809151 #endif diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 712a409633d..a7a638e083b 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.57 2008/07/11 21:06:29 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.58 2008/09/15 18:43:41 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -48,6 +48,7 @@ CATALOG(pg_am,2601) bool amsearchnulls; /* can AM search for NULL index entries? */ bool amstorage; /* can storage type differ from column type? */ bool amclusterable; /* does AM support cluster command? */ + Oid amkeytype; /* type of data in index, or InvalidOid */ regproc aminsert; /* "insert this tuple" function */ regproc ambeginscan; /* "start new scan" function */ regproc amgettuple; /* "next valid tuple" function */ @@ -74,7 +75,7 @@ typedef FormData_pg_am *Form_pg_am; * compiler constants for pg_am * ---------------- */ -#define Natts_pg_am 24 +#define Natts_pg_am 25 #define Anum_pg_am_amname 1 #define Anum_pg_am_amstrategies 2 #define Anum_pg_am_amsupport 3 @@ -86,35 +87,36 @@ typedef FormData_pg_am *Form_pg_am; #define Anum_pg_am_amsearchnulls 9 #define Anum_pg_am_amstorage 10 #define Anum_pg_am_amclusterable 11 -#define Anum_pg_am_aminsert 12 -#define Anum_pg_am_ambeginscan 13 -#define Anum_pg_am_amgettuple 14 -#define Anum_pg_am_amgetbitmap 15 -#define Anum_pg_am_amrescan 16 -#define Anum_pg_am_amendscan 17 -#define Anum_pg_am_ammarkpos 18 -#define Anum_pg_am_amrestrpos 19 -#define Anum_pg_am_ambuild 20 -#define Anum_pg_am_ambulkdelete 21 -#define Anum_pg_am_amvacuumcleanup 22 -#define Anum_pg_am_amcostestimate 23 -#define Anum_pg_am_amoptions 24 +#define Anum_pg_am_amkeytype 12 +#define Anum_pg_am_aminsert 13 +#define Anum_pg_am_ambeginscan 14 +#define Anum_pg_am_amgettuple 15 +#define Anum_pg_am_amgetbitmap 16 +#define Anum_pg_am_amrescan 17 +#define Anum_pg_am_amendscan 18 +#define Anum_pg_am_ammarkpos 19 +#define Anum_pg_am_amrestrpos 20 +#define Anum_pg_am_ambuild 21 +#define Anum_pg_am_ambulkdelete 22 +#define Anum_pg_am_amvacuumcleanup 23 +#define Anum_pg_am_amcostestimate 24 +#define Anum_pg_am_amoptions 25 /* ---------------- * initial contents of pg_am * ---------------- */ -DATA(insert OID = 403 ( btree 5 1 t t t t t t f t btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate btoptions )); +DATA(insert OID = 403 ( btree 5 1 t t t t t t f t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate btoptions )); DESCR("b-tree index access method"); #define BTREE_AM_OID 403 -DATA(insert OID = 405 ( hash 1 1 f f f f f f f f hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); +DATA(insert OID = 405 ( hash 1 1 f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); DESCR("hash index access method"); #define HASH_AM_OID 405 -DATA(insert OID = 783 ( gist 0 7 f f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); +DATA(insert OID = 783 ( gist 0 7 f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 -DATA(insert OID = 2742 ( gin 0 5 f f t t f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); +DATA(insert OID = 2742 ( gin 0 5 f f t t f f t f 0 gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h index f0cb23e2708..7c4d95003c8 100644 --- a/src/include/catalog/pg_opclass.h +++ b/src/include/catalog/pg_opclass.h @@ -28,7 +28,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_opclass.h,v 1.82 2008/06/24 17:58:27 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_opclass.h,v 1.83 2008/09/15 18:43:41 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -123,13 +123,13 @@ DATA(insert ( 403 macaddr_ops PGNSP PGUID 1984 829 t 0 )); DATA(insert ( 405 macaddr_ops PGNSP PGUID 1985 829 t 0 )); /* * Here's an ugly little hack to save space in the system catalog indexes. - * btree and hash don't ordinarily allow a storage type different from input - * type; but cstring and name are the same thing except for trailing padding, + * btree doesn't ordinarily allow a storage type different from input type; + * but cstring and name are the same thing except for trailing padding, * and we can safely omit that within an index entry. So we declare the - * opclasses for name as using cstring storage type. + * btree opclass for name as using cstring storage type. */ DATA(insert ( 403 name_ops PGNSP PGUID 1986 19 t 2275 )); -DATA(insert ( 405 name_ops PGNSP PGUID 1987 19 t 2275 )); +DATA(insert ( 405 name_ops PGNSP PGUID 1987 19 t 0 )); DATA(insert ( 403 numeric_ops PGNSP PGUID 1988 1700 t 0 )); DATA(insert ( 405 numeric_ops PGNSP PGUID 1998 1700 t 0 )); DATA(insert OID = 1981 ( 403 oid_ops PGNSP PGUID 1989 26 t 0 ));