mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			364 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			364 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-------------------------------------------------------------------------
 | |
|  *
 | |
|  * hash.h
 | |
|  *	  header file for postgres hash access method implementation
 | |
|  *
 | |
|  *
 | |
|  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
 | |
|  * Portions Copyright (c) 1994, Regents of the University of California
 | |
|  *
 | |
|  * src/include/access/hash.h
 | |
|  *
 | |
|  * NOTES
 | |
|  *		modeled after Margo Seltzer's hash implementation for unix.
 | |
|  *
 | |
|  *-------------------------------------------------------------------------
 | |
|  */
 | |
| #ifndef HASH_H
 | |
| #define HASH_H
 | |
| 
 | |
| #include "access/genam.h"
 | |
| #include "access/itup.h"
 | |
| #include "access/sdir.h"
 | |
| #include "access/xlogreader.h"
 | |
| #include "fmgr.h"
 | |
| #include "lib/stringinfo.h"
 | |
| #include "storage/bufmgr.h"
 | |
| #include "storage/lock.h"
 | |
| #include "utils/relcache.h"
 | |
| 
 | |
| /*
 | |
|  * Mapping from hash bucket number to physical block number of bucket's
 | |
|  * starting page.  Beware of multiple evaluations of argument!
 | |
|  */
 | |
| typedef uint32 Bucket;
 | |
| 
 | |
| #define BUCKET_TO_BLKNO(metap,B) \
 | |
| 		((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
 | |
| 
 | |
| /*
 | |
|  * Special space for hash index pages.
 | |
|  *
 | |
|  * hasho_flag tells us which type of page we're looking at.  For
 | |
|  * example, knowing overflow pages from bucket pages is necessary
 | |
|  * information when you're deleting tuples from a page. If all the
 | |
|  * tuples are deleted from an overflow page, the overflow is made
 | |
|  * available to other buckets by calling _hash_freeovflpage(). If all
 | |
|  * the tuples are deleted from a bucket page, no additional action is
 | |
|  * necessary.
 | |
|  */
 | |
| #define LH_UNUSED_PAGE			(0)
 | |
| #define LH_OVERFLOW_PAGE		(1 << 0)
 | |
| #define LH_BUCKET_PAGE			(1 << 1)
 | |
| #define LH_BITMAP_PAGE			(1 << 2)
 | |
| #define LH_META_PAGE			(1 << 3)
 | |
| 
 | |
| typedef struct HashPageOpaqueData
 | |
| {
 | |
| 	BlockNumber hasho_prevblkno;	/* previous ovfl (or bucket) blkno */
 | |
| 	BlockNumber hasho_nextblkno;	/* next ovfl blkno */
 | |
| 	Bucket		hasho_bucket;	/* bucket number this pg belongs to */
 | |
| 	uint16		hasho_flag;		/* page type code, see above */
 | |
| 	uint16		hasho_page_id;	/* for identification of hash indexes */
 | |
| } HashPageOpaqueData;
 | |
| 
 | |
| typedef HashPageOpaqueData *HashPageOpaque;
 | |
| 
 | |
| /*
 | |
|  * The page ID is for the convenience of pg_filedump and similar utilities,
 | |
|  * which otherwise would have a hard time telling pages of different index
 | |
|  * types apart.  It should be the last 2 bytes on the page.  This is more or
 | |
|  * less "free" due to alignment considerations.
 | |
|  */
 | |
| #define HASHO_PAGE_ID		0xFF80
 | |
| 
 | |
| /*
 | |
|  *	HashScanOpaqueData is private state for a hash index scan.
 | |
|  */
 | |
| typedef struct HashScanOpaqueData
 | |
| {
 | |
| 	/* Hash value of the scan key, ie, the hash key we seek */
 | |
| 	uint32		hashso_sk_hash;
 | |
| 
 | |
| 	/*
 | |
| 	 * By definition, a hash scan should be examining only one bucket. We
 | |
| 	 * record the bucket number here as soon as it is known.
 | |
| 	 */
 | |
| 	Bucket		hashso_bucket;
 | |
| 	bool		hashso_bucket_valid;
 | |
| 
 | |
| 	/*
 | |
| 	 * If we have a share lock on the bucket, we record it here.  When
 | |
| 	 * hashso_bucket_blkno is zero, we have no such lock.
 | |
| 	 */
 | |
| 	BlockNumber hashso_bucket_blkno;
 | |
| 
 | |
| 	/*
 | |
| 	 * We also want to remember which buffer we're currently examining in the
 | |
| 	 * scan. We keep the buffer pinned (but not locked) across hashgettuple
 | |
| 	 * calls, in order to avoid doing a ReadBuffer() for every tuple in the
 | |
| 	 * index.
 | |
| 	 */
 | |
| 	Buffer		hashso_curbuf;
 | |
| 
 | |
| 	/* Current position of the scan, as an index TID */
 | |
| 	ItemPointerData hashso_curpos;
 | |
| 
 | |
| 	/* Current position of the scan, as a heap TID */
 | |
| 	ItemPointerData hashso_heappos;
 | |
| } HashScanOpaqueData;
 | |
| 
 | |
| typedef HashScanOpaqueData *HashScanOpaque;
 | |
| 
 | |
| /*
 | |
|  * Definitions for metapage.
 | |
|  */
 | |
| 
 | |
| #define HASH_METAPAGE	0		/* metapage is always block 0 */
 | |
| 
 | |
| #define HASH_MAGIC		0x6440640
 | |
| #define HASH_VERSION	2		/* 2 signifies only hash key value is stored */
 | |
| 
 | |
| /*
 | |
|  * Spares[] holds the number of overflow pages currently allocated at or
 | |
|  * before a certain splitpoint. For example, if spares[3] = 7 then there are
 | |
|  * 7 ovflpages before splitpoint 3 (compare BUCKET_TO_BLKNO macro).  The
 | |
|  * value in spares[ovflpoint] increases as overflow pages are added at the
 | |
|  * end of the index.  Once ovflpoint increases (ie, we have actually allocated
 | |
|  * the bucket pages belonging to that splitpoint) the number of spares at the
 | |
|  * prior splitpoint cannot change anymore.
 | |
|  *
 | |
|  * ovflpages that have been recycled for reuse can be found by looking at
 | |
|  * bitmaps that are stored within ovflpages dedicated for the purpose.
 | |
|  * The blknos of these bitmap pages are kept in bitmaps[]; nmaps is the
 | |
|  * number of currently existing bitmaps.
 | |
|  *
 | |
|  * The limitation on the size of spares[] comes from the fact that there's
 | |
|  * no point in having more than 2^32 buckets with only uint32 hashcodes.
 | |
|  * There is no particular upper limit on the size of mapp[], other than
 | |
|  * needing to fit into the metapage.  (With 8K block size, 128 bitmaps
 | |
|  * limit us to 64 Gb of overflow space...)
 | |
|  */
 | |
| #define HASH_MAX_SPLITPOINTS		32
 | |
| #define HASH_MAX_BITMAPS			128
 | |
| 
 | |
| typedef struct HashMetaPageData
 | |
| {
 | |
| 	uint32		hashm_magic;	/* magic no. for hash tables */
 | |
| 	uint32		hashm_version;	/* version ID */
 | |
| 	double		hashm_ntuples;	/* number of tuples stored in the table */
 | |
| 	uint16		hashm_ffactor;	/* target fill factor (tuples/bucket) */
 | |
| 	uint16		hashm_bsize;	/* index page size (bytes) */
 | |
| 	uint16		hashm_bmsize;	/* bitmap array size (bytes) - must be a power
 | |
| 								 * of 2 */
 | |
| 	uint16		hashm_bmshift;	/* log2(bitmap array size in BITS) */
 | |
| 	uint32		hashm_maxbucket;	/* ID of maximum bucket in use */
 | |
| 	uint32		hashm_highmask; /* mask to modulo into entire table */
 | |
| 	uint32		hashm_lowmask;	/* mask to modulo into lower half of table */
 | |
| 	uint32		hashm_ovflpoint;/* splitpoint from which ovflpgs being
 | |
| 								 * allocated */
 | |
| 	uint32		hashm_firstfree;	/* lowest-number free ovflpage (bit#) */
 | |
| 	uint32		hashm_nmaps;	/* number of bitmap pages */
 | |
| 	RegProcedure hashm_procid;	/* hash procedure id from pg_proc */
 | |
| 	uint32		hashm_spares[HASH_MAX_SPLITPOINTS];		/* spare pages before
 | |
| 														 * each splitpoint */
 | |
| 	BlockNumber hashm_mapp[HASH_MAX_BITMAPS];	/* blknos of ovfl bitmaps */
 | |
| } HashMetaPageData;
 | |
| 
 | |
| typedef HashMetaPageData *HashMetaPage;
 | |
| 
 | |
| /*
 | |
|  * Maximum size of a hash index item (it's okay to have only one per page)
 | |
|  */
 | |
| #define HashMaxItemSize(page) \
 | |
| 	MAXALIGN_DOWN(PageGetPageSize(page) - \
 | |
| 				  SizeOfPageHeaderData - \
 | |
| 				  sizeof(ItemIdData) - \
 | |
| 				  MAXALIGN(sizeof(HashPageOpaqueData)))
 | |
| 
 | |
| #define HASH_MIN_FILLFACTOR			10
 | |
| #define HASH_DEFAULT_FILLFACTOR		75
 | |
| 
 | |
| /*
 | |
|  * Constants
 | |
|  */
 | |
| #define BYTE_TO_BIT				3		/* 2^3 bits/byte */
 | |
| #define ALL_SET					((uint32) ~0)
 | |
| 
 | |
| /*
 | |
|  * Bitmap pages do not contain tuples.  They do contain the standard
 | |
|  * page headers and trailers; however, everything in between is a
 | |
|  * giant bit array.  The number of bits that fit on a page obviously
 | |
|  * depends on the page size and the header/trailer overhead.  We require
 | |
|  * the number of bits per page to be a power of 2.
 | |
|  */
 | |
| #define BMPGSZ_BYTE(metap)		((metap)->hashm_bmsize)
 | |
| #define BMPGSZ_BIT(metap)		((metap)->hashm_bmsize << BYTE_TO_BIT)
 | |
| #define BMPG_SHIFT(metap)		((metap)->hashm_bmshift)
 | |
| #define BMPG_MASK(metap)		(BMPGSZ_BIT(metap) - 1)
 | |
| 
 | |
| #define HashPageGetBitmap(page) \
 | |
| 	((uint32 *) PageGetContents(page))
 | |
| 
 | |
| #define HashGetMaxBitmapSize(page) \
 | |
| 	(PageGetPageSize((Page) page) - \
 | |
| 	 (MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(HashPageOpaqueData))))
 | |
| 
 | |
| #define HashPageGetMeta(page) \
 | |
| 	((HashMetaPage) PageGetContents(page))
 | |
| 
 | |
| /*
 | |
|  * The number of bits in an ovflpage bitmap word.
 | |
|  */
 | |
| #define BITS_PER_MAP	32		/* Number of bits in uint32 */
 | |
| 
 | |
| /* Given the address of the beginning of a bit map, clear/set the nth bit */
 | |
| #define CLRBIT(A, N)	((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
 | |
| #define SETBIT(A, N)	((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
 | |
| #define ISSET(A, N)		((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
 | |
| 
 | |
| /*
 | |
|  * page-level and high-level locking modes (see README)
 | |
|  */
 | |
| #define HASH_READ		BUFFER_LOCK_SHARE
 | |
| #define HASH_WRITE		BUFFER_LOCK_EXCLUSIVE
 | |
| #define HASH_NOLOCK		(-1)
 | |
| 
 | |
| #define HASH_SHARE		ShareLock
 | |
| #define HASH_EXCLUSIVE	ExclusiveLock
 | |
| 
 | |
| /*
 | |
|  *	Strategy number. There's only one valid strategy for hashing: equality.
 | |
|  */
 | |
| #define HTEqualStrategyNumber			1
 | |
| #define HTMaxStrategyNumber				1
 | |
| 
 | |
| /*
 | |
|  *	When a new operator class is declared, we require that the user supply
 | |
|  *	us with an amproc procudure for hashing a key of the new type.
 | |
|  *	Since we only have one such proc in amproc, it's number 1.
 | |
|  */
 | |
| #define HASHPROC		1
 | |
| 
 | |
| 
 | |
| /* public routines */
 | |
| 
 | |
| extern Datum hashbuild(PG_FUNCTION_ARGS);
 | |
| extern Datum hashbuildempty(PG_FUNCTION_ARGS);
 | |
| extern Datum hashinsert(PG_FUNCTION_ARGS);
 | |
| extern Datum hashbeginscan(PG_FUNCTION_ARGS);
 | |
| extern Datum hashgettuple(PG_FUNCTION_ARGS);
 | |
| extern Datum hashgetbitmap(PG_FUNCTION_ARGS);
 | |
| extern Datum hashrescan(PG_FUNCTION_ARGS);
 | |
| extern Datum hashendscan(PG_FUNCTION_ARGS);
 | |
| extern Datum hashmarkpos(PG_FUNCTION_ARGS);
 | |
| extern Datum hashrestrpos(PG_FUNCTION_ARGS);
 | |
| extern Datum hashbulkdelete(PG_FUNCTION_ARGS);
 | |
| extern Datum hashvacuumcleanup(PG_FUNCTION_ARGS);
 | |
| extern Datum hashoptions(PG_FUNCTION_ARGS);
 | |
| 
 | |
| /*
 | |
|  * Datatype-specific hash functions in hashfunc.c.
 | |
|  *
 | |
|  * These support both hash indexes and hash joins.
 | |
|  *
 | |
|  * NOTE: some of these are also used by catcache operations, without
 | |
|  * any direct connection to hash indexes.  Also, the common hash_any
 | |
|  * routine is also used by dynahash tables.
 | |
|  */
 | |
| extern Datum hashchar(PG_FUNCTION_ARGS);
 | |
| extern Datum hashint2(PG_FUNCTION_ARGS);
 | |
| extern Datum hashint4(PG_FUNCTION_ARGS);
 | |
| extern Datum hashint8(PG_FUNCTION_ARGS);
 | |
| extern Datum hashoid(PG_FUNCTION_ARGS);
 | |
| extern Datum hashenum(PG_FUNCTION_ARGS);
 | |
| extern Datum hashfloat4(PG_FUNCTION_ARGS);
 | |
| extern Datum hashfloat8(PG_FUNCTION_ARGS);
 | |
| extern Datum hashoidvector(PG_FUNCTION_ARGS);
 | |
| extern Datum hashint2vector(PG_FUNCTION_ARGS);
 | |
| extern Datum hashname(PG_FUNCTION_ARGS);
 | |
| extern Datum hashtext(PG_FUNCTION_ARGS);
 | |
| extern Datum hashvarlena(PG_FUNCTION_ARGS);
 | |
| extern Datum hash_any(register const unsigned char *k, register int keylen);
 | |
| extern Datum hash_uint32(uint32 k);
 | |
| 
 | |
| /* private routines */
 | |
| 
 | |
| /* hashinsert.c */
 | |
| extern void _hash_doinsert(Relation rel, IndexTuple itup);
 | |
| extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
 | |
| 			   Size itemsize, IndexTuple itup);
 | |
| 
 | |
| /* hashovfl.c */
 | |
| extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
 | |
| extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
 | |
| 				   BufferAccessStrategy bstrategy);
 | |
| extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
 | |
| 				 BlockNumber blkno, ForkNumber forkNum);
 | |
| extern void _hash_squeezebucket(Relation rel,
 | |
| 					Bucket bucket, BlockNumber bucket_blkno,
 | |
| 					BufferAccessStrategy bstrategy);
 | |
| 
 | |
| /* hashpage.c */
 | |
| extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
 | |
| extern bool _hash_try_getlock(Relation rel, BlockNumber whichlock, int access);
 | |
| extern void _hash_droplock(Relation rel, BlockNumber whichlock, int access);
 | |
| extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
 | |
| 			 int access, int flags);
 | |
| extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
 | |
| extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
 | |
| 				ForkNumber forkNum);
 | |
| extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
 | |
| 						   int access, int flags,
 | |
| 						   BufferAccessStrategy bstrategy);
 | |
| extern void _hash_relbuf(Relation rel, Buffer buf);
 | |
| extern void _hash_dropbuf(Relation rel, Buffer buf);
 | |
| extern void _hash_wrtbuf(Relation rel, Buffer buf);
 | |
| extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
 | |
| 				   int to_access);
 | |
| extern uint32 _hash_metapinit(Relation rel, double num_tuples,
 | |
| 				ForkNumber forkNum);
 | |
| extern void _hash_pageinit(Page page, Size size);
 | |
| extern void _hash_expandtable(Relation rel, Buffer metabuf);
 | |
| 
 | |
| /* hashscan.c */
 | |
| extern void _hash_regscan(IndexScanDesc scan);
 | |
| extern void _hash_dropscan(IndexScanDesc scan);
 | |
| extern bool _hash_has_active_scan(Relation rel, Bucket bucket);
 | |
| extern void ReleaseResources_hash(void);
 | |
| 
 | |
| /* hashsearch.c */
 | |
| extern bool _hash_next(IndexScanDesc scan, ScanDirection dir);
 | |
| extern bool _hash_first(IndexScanDesc scan, ScanDirection dir);
 | |
| extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
 | |
| 
 | |
| /* hashsort.c */
 | |
| typedef struct HSpool HSpool;	/* opaque struct in hashsort.c */
 | |
| 
 | |
| extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets);
 | |
| extern void _h_spooldestroy(HSpool *hspool);
 | |
| extern void _h_spool(HSpool *hspool, ItemPointer self,
 | |
| 					 Datum *values, bool *isnull);
 | |
| extern void _h_indexbuild(HSpool *hspool);
 | |
| 
 | |
| /* hashutil.c */
 | |
| extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
 | |
| extern uint32 _hash_datum2hashkey(Relation rel, Datum key);
 | |
| extern uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype);
 | |
| extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
 | |
| 					 uint32 highmask, uint32 lowmask);
 | |
| extern uint32 _hash_log2(uint32 num);
 | |
| extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
 | |
| extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup);
 | |
| extern IndexTuple _hash_form_tuple(Relation index,
 | |
| 				 Datum *values, bool *isnull);
 | |
| extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
 | |
| extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
 | |
| 
 | |
| /* hash.c */
 | |
| extern void hash_redo(XLogReaderState *record);
 | |
| extern void hash_desc(StringInfo buf, XLogReaderState *record);
 | |
| extern const char *hash_identify(uint8 info);
 | |
| 
 | |
| #endif   /* HASH_H */
 |