mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Use perfect hashing, instead of binary search, for keyword lookup.
We've been speculating for a long time that hash-based keyword lookup
ought to be faster than binary search, but up to now we hadn't found
a suitable tool for generating the hash function.  Joerg Sonnenberger
provided the inspiration, and sample code, to show us that rolling our
own generator wasn't a ridiculous idea.  Hence, do that.
The method used here requires a lookup table of approximately 4 bytes
per keyword, but that's less than what we saved in the predecessor commit
afb0d0712, so it's not a big problem.  The time savings is indeed
significant: preliminary testing suggests that the total time for raw
parsing (flex + bison phases) drops by ~20%.
Patch by me, but it owes its existence to Joerg Sonnenberger;
thanks also to John Naylor for review.
Discussion: https://postgr.es/m/20190103163340.GA15803@britannica.bec.de
			
			
This commit is contained in:
		| @@ -63,6 +63,11 @@ OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o file_utils.o restricted_token.o | |||||||
| OBJS_SHLIB = $(OBJS_FRONTEND:%.o=%_shlib.o) | OBJS_SHLIB = $(OBJS_FRONTEND:%.o=%_shlib.o) | ||||||
| OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o) | OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o) | ||||||
|  |  | ||||||
|  | # where to find gen_keywordlist.pl and subsidiary files | ||||||
|  | TOOLSDIR = $(top_srcdir)/src/tools | ||||||
|  | GEN_KEYWORDLIST = $(PERL) -I $(TOOLSDIR) $(TOOLSDIR)/gen_keywordlist.pl | ||||||
|  | GEN_KEYWORDLIST_DEPS = $(TOOLSDIR)/gen_keywordlist.pl $(TOOLSDIR)/PerfectHash.pm | ||||||
|  |  | ||||||
| all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a | all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a | ||||||
|  |  | ||||||
| distprep: kwlist_d.h | distprep: kwlist_d.h | ||||||
| @@ -118,8 +123,8 @@ libpgcommon_srv.a: $(OBJS_SRV) | |||||||
| 	$(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@ | 	$(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@ | ||||||
|  |  | ||||||
| # generate SQL keyword lookup table to be included into keywords*.o. | # generate SQL keyword lookup table to be included into keywords*.o. | ||||||
| kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(top_srcdir)/src/tools/gen_keywordlist.pl | kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(GEN_KEYWORDLIST_DEPS) | ||||||
| 	$(PERL) $(top_srcdir)/src/tools/gen_keywordlist.pl --extern $< | 	$(GEN_KEYWORDLIST) --extern $< | ||||||
|  |  | ||||||
| # Dependencies of keywords*.o need to be managed explicitly to make sure | # Dependencies of keywords*.o need to be managed explicitly to make sure | ||||||
| # that you don't get broken parsing code, even in a non-enable-depend build. | # that you don't get broken parsing code, even in a non-enable-depend build. | ||||||
|   | |||||||
| @@ -35,60 +35,51 @@ | |||||||
|  * receive a different case-normalization mapping. |  * receive a different case-normalization mapping. | ||||||
|  */ |  */ | ||||||
| int | int | ||||||
| ScanKeywordLookup(const char *text, | ScanKeywordLookup(const char *str, | ||||||
| 				  const ScanKeywordList *keywords) | 				  const ScanKeywordList *keywords) | ||||||
| { | { | ||||||
| 	int			len, | 	size_t		len; | ||||||
| 				i; | 	int			h; | ||||||
| 	char		word[NAMEDATALEN]; | 	const char *kw; | ||||||
| 	const char *kw_string; |  | ||||||
| 	const uint16 *kw_offsets; |  | ||||||
| 	const uint16 *low; |  | ||||||
| 	const uint16 *high; |  | ||||||
|  |  | ||||||
| 	len = strlen(text); |  | ||||||
|  |  | ||||||
| 	if (len > keywords->max_kw_len) |  | ||||||
| 		return -1;				/* too long to be any keyword */ |  | ||||||
|  |  | ||||||
| 	/* We assume all keywords are shorter than NAMEDATALEN. */ |  | ||||||
| 	Assert(len < NAMEDATALEN); |  | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Apply an ASCII-only downcasing.  We must not use tolower() since it may | 	 * Reject immediately if too long to be any keyword.  This saves useless | ||||||
| 	 * produce the wrong translation in some locales (eg, Turkish). | 	 * hashing and downcasing work on long strings. | ||||||
| 	 */ | 	 */ | ||||||
| 	for (i = 0; i < len; i++) | 	len = strlen(str); | ||||||
|  | 	if (len > keywords->max_kw_len) | ||||||
|  | 		return -1; | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Compute the hash function.  We assume it was generated to produce | ||||||
|  | 	 * case-insensitive results.  Since it's a perfect hash, we need only | ||||||
|  | 	 * match to the specific keyword it identifies. | ||||||
|  | 	 */ | ||||||
|  | 	h = keywords->hash(str, len); | ||||||
|  |  | ||||||
|  | 	/* An out-of-range result implies no match */ | ||||||
|  | 	if (h < 0 || h >= keywords->num_keywords) | ||||||
|  | 		return -1; | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Compare character-by-character to see if we have a match, applying an | ||||||
|  | 	 * ASCII-only downcasing to the input characters.  We must not use | ||||||
|  | 	 * tolower() since it may produce the wrong translation in some locales | ||||||
|  | 	 * (eg, Turkish). | ||||||
|  | 	 */ | ||||||
|  | 	kw = GetScanKeyword(h, keywords); | ||||||
|  | 	while (*str != '\0') | ||||||
| 	{ | 	{ | ||||||
| 		char		ch = text[i]; | 		char		ch = *str++; | ||||||
|  |  | ||||||
| 		if (ch >= 'A' && ch <= 'Z') | 		if (ch >= 'A' && ch <= 'Z') | ||||||
| 			ch += 'a' - 'A'; | 			ch += 'a' - 'A'; | ||||||
| 		word[i] = ch; | 		if (ch != *kw++) | ||||||
| 	} |  | ||||||
| 	word[len] = '\0'; |  | ||||||
|  |  | ||||||
| 	/* |  | ||||||
| 	 * Now do a binary search using plain strcmp() comparison. |  | ||||||
| 	 */ |  | ||||||
| 	kw_string = keywords->kw_string; |  | ||||||
| 	kw_offsets = keywords->kw_offsets; |  | ||||||
| 	low = kw_offsets; |  | ||||||
| 	high = kw_offsets + (keywords->num_keywords - 1); |  | ||||||
| 	while (low <= high) |  | ||||||
| 	{ |  | ||||||
| 		const uint16 *middle; |  | ||||||
| 		int			difference; |  | ||||||
|  |  | ||||||
| 		middle = low + (high - low) / 2; |  | ||||||
| 		difference = strcmp(kw_string + *middle, word); |  | ||||||
| 		if (difference == 0) |  | ||||||
| 			return middle - kw_offsets; |  | ||||||
| 		else if (difference < 0) |  | ||||||
| 			low = middle + 1; |  | ||||||
| 		else |  | ||||||
| 			high = middle - 1; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 			return -1; | 			return -1; | ||||||
|  | 	} | ||||||
|  | 	if (*kw != '\0') | ||||||
|  | 		return -1; | ||||||
|  |  | ||||||
|  | 	/* Success! */ | ||||||
|  | 	return h; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -14,6 +14,9 @@ | |||||||
| #ifndef KWLOOKUP_H | #ifndef KWLOOKUP_H | ||||||
| #define KWLOOKUP_H | #define KWLOOKUP_H | ||||||
|  |  | ||||||
|  | /* Hash function used by ScanKeywordLookup */ | ||||||
|  | typedef int (*ScanKeywordHashFunc) (const void *key, size_t keylen); | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * This struct contains the data needed by ScanKeywordLookup to perform a |  * This struct contains the data needed by ScanKeywordLookup to perform a | ||||||
|  * search within a set of keywords.  The contents are typically generated by |  * search within a set of keywords.  The contents are typically generated by | ||||||
| @@ -23,6 +26,7 @@ typedef struct ScanKeywordList | |||||||
| { | { | ||||||
| 	const char *kw_string;		/* all keywords in order, separated by \0 */ | 	const char *kw_string;		/* all keywords in order, separated by \0 */ | ||||||
| 	const uint16 *kw_offsets;	/* offsets to the start of each keyword */ | 	const uint16 *kw_offsets;	/* offsets to the start of each keyword */ | ||||||
|  | 	ScanKeywordHashFunc hash;	/* perfect hash function for keywords */ | ||||||
| 	int			num_keywords;	/* number of keywords */ | 	int			num_keywords;	/* number of keywords */ | ||||||
| 	int			max_kw_len;		/* length of longest keyword */ | 	int			max_kw_len;		/* length of longest keyword */ | ||||||
| } ScanKeywordList; | } ScanKeywordList; | ||||||
|   | |||||||
| @@ -21,8 +21,7 @@ | |||||||
| /* | /* | ||||||
|  * List of keyword (name, token-value, category) entries. |  * List of keyword (name, token-value, category) entries. | ||||||
|  * |  * | ||||||
|  * !!WARNING!!: This list must be sorted by ASCII name, because binary |  * Note: gen_keywordlist.pl requires the entries to appear in ASCII order. | ||||||
|  *		 search is used to locate entries. |  | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| /* name, value, category */ | /* name, value, category */ | ||||||
|   | |||||||
| @@ -28,7 +28,10 @@ OBJS=	preproc.o pgc.o type.o ecpg.o output.o parser.o \ | |||||||
| 	keywords.o c_keywords.o ecpg_keywords.o typename.o descriptor.o variable.o \ | 	keywords.o c_keywords.o ecpg_keywords.o typename.o descriptor.o variable.o \ | ||||||
| 	$(WIN32RES) | 	$(WIN32RES) | ||||||
|  |  | ||||||
| GEN_KEYWORDLIST = $(top_srcdir)/src/tools/gen_keywordlist.pl | # where to find gen_keywordlist.pl and subsidiary files | ||||||
|  | TOOLSDIR = $(top_srcdir)/src/tools | ||||||
|  | GEN_KEYWORDLIST = $(PERL) -I $(TOOLSDIR) $(TOOLSDIR)/gen_keywordlist.pl | ||||||
|  | GEN_KEYWORDLIST_DEPS = $(TOOLSDIR)/gen_keywordlist.pl $(TOOLSDIR)/PerfectHash.pm | ||||||
|  |  | ||||||
| # Suppress parallel build to avoid a bug in GNU make 3.82 | # Suppress parallel build to avoid a bug in GNU make 3.82 | ||||||
| # (see comments in ../Makefile) | # (see comments in ../Makefile) | ||||||
| @@ -56,11 +59,11 @@ preproc.y: ../../../backend/parser/gram.y parse.pl ecpg.addons ecpg.header ecpg. | |||||||
| 	$(PERL) $(srcdir)/check_rules.pl $(srcdir) $< | 	$(PERL) $(srcdir)/check_rules.pl $(srcdir) $< | ||||||
|  |  | ||||||
| # generate keyword headers | # generate keyword headers | ||||||
| c_kwlist_d.h: c_kwlist.h $(GEN_KEYWORDLIST) | c_kwlist_d.h: c_kwlist.h $(GEN_KEYWORDLIST_DEPS) | ||||||
| 	$(PERL) $(GEN_KEYWORDLIST) --varname ScanCKeywords $< | 	$(GEN_KEYWORDLIST) --varname ScanCKeywords --no-case-fold $< | ||||||
|  |  | ||||||
| ecpg_kwlist_d.h: ecpg_kwlist.h $(GEN_KEYWORDLIST) | ecpg_kwlist_d.h: ecpg_kwlist.h $(GEN_KEYWORDLIST_DEPS) | ||||||
| 	$(PERL) $(GEN_KEYWORDLIST) --varname ScanECPGKeywords $< | 	$(GEN_KEYWORDLIST) --varname ScanECPGKeywords $< | ||||||
|  |  | ||||||
| # Force these dependencies to be known even without dependency info built: | # Force these dependencies to be known even without dependency info built: | ||||||
| ecpg_keywords.o c_keywords.o keywords.o preproc.o pgc.o parser.o: preproc.h | ecpg_keywords.o c_keywords.o keywords.o preproc.o pgc.o parser.o: preproc.h | ||||||
|   | |||||||
| @@ -9,8 +9,6 @@ | |||||||
|  */ |  */ | ||||||
| #include "postgres_fe.h" | #include "postgres_fe.h" | ||||||
|  |  | ||||||
| #include <ctype.h> |  | ||||||
|  |  | ||||||
| #include "preproc_extern.h" | #include "preproc_extern.h" | ||||||
| #include "preproc.h" | #include "preproc.h" | ||||||
|  |  | ||||||
| @@ -32,39 +30,38 @@ static const uint16 ScanCKeywordTokens[] = { | |||||||
|  * |  * | ||||||
|  * Returns the token value of the keyword, or -1 if no match. |  * Returns the token value of the keyword, or -1 if no match. | ||||||
|  * |  * | ||||||
|  * Do a binary search using plain strcmp() comparison.  This is much like |  * Do a hash search using plain strcmp() comparison.  This is much like | ||||||
|  * ScanKeywordLookup(), except we want case-sensitive matching. |  * ScanKeywordLookup(), except we want case-sensitive matching. | ||||||
|  */ |  */ | ||||||
| int | int | ||||||
| ScanCKeywordLookup(const char *text) | ScanCKeywordLookup(const char *str) | ||||||
| { | { | ||||||
| 	const char *kw_string; | 	size_t		len; | ||||||
| 	const uint16 *kw_offsets; | 	int			h; | ||||||
| 	const uint16 *low; | 	const char *kw; | ||||||
| 	const uint16 *high; |  | ||||||
|  |  | ||||||
| 	if (strlen(text) > ScanCKeywords.max_kw_len) | 	/* | ||||||
| 		return -1;				/* too long to be any keyword */ | 	 * Reject immediately if too long to be any keyword.  This saves useless | ||||||
|  | 	 * hashing work on long strings. | ||||||
|  | 	 */ | ||||||
|  | 	len = strlen(str); | ||||||
|  | 	if (len > ScanCKeywords.max_kw_len) | ||||||
|  | 		return -1; | ||||||
|  |  | ||||||
| 	kw_string = ScanCKeywords.kw_string; | 	/* | ||||||
| 	kw_offsets = ScanCKeywords.kw_offsets; | 	 * Compute the hash function.  Since it's a perfect hash, we need only | ||||||
| 	low = kw_offsets; | 	 * match to the specific keyword it identifies. | ||||||
| 	high = kw_offsets + (ScanCKeywords.num_keywords - 1); | 	 */ | ||||||
|  | 	h = ScanCKeywords_hash_func(str, len); | ||||||
|  |  | ||||||
| 	while (low <= high) | 	/* An out-of-range result implies no match */ | ||||||
| 	{ | 	if (h < 0 || h >= ScanCKeywords.num_keywords) | ||||||
| 		const uint16 *middle; | 		return -1; | ||||||
| 		int			difference; |  | ||||||
|  |  | ||||||
| 		middle = low + (high - low) / 2; | 	kw = GetScanKeyword(h, &ScanCKeywords); | ||||||
| 		difference = strcmp(kw_string + *middle, text); |  | ||||||
| 		if (difference == 0) | 	if (strcmp(kw, str) == 0) | ||||||
| 			return ScanCKeywordTokens[middle - kw_offsets]; | 		return ScanCKeywordTokens[h]; | ||||||
| 		else if (difference < 0) |  | ||||||
| 			low = middle + 1; |  | ||||||
| 		else |  | ||||||
| 			high = middle - 1; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return -1; | 	return -1; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -20,8 +20,7 @@ | |||||||
| /* | /* | ||||||
|  * List of (keyword-name, keyword-token-value) pairs. |  * List of (keyword-name, keyword-token-value) pairs. | ||||||
|  * |  * | ||||||
|  * !!WARNING!!: This list must be sorted by ASCII name, because binary |  * Note: gen_keywordlist.pl requires the entries to appear in ASCII order. | ||||||
|  *		 search is used to locate entries. |  | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| /* name, value */ | /* name, value */ | ||||||
|   | |||||||
| @@ -20,8 +20,7 @@ | |||||||
| /* | /* | ||||||
|  * List of (keyword-name, keyword-token-value) pairs. |  * List of (keyword-name, keyword-token-value) pairs. | ||||||
|  * |  * | ||||||
|  * !!WARNING!!: This list must be sorted by ASCII name, because binary |  * Note: gen_keywordlist.pl requires the entries to appear in ASCII order. | ||||||
|  *		 search is used to locate entries. |  | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| /* name, value */ | /* name, value */ | ||||||
|   | |||||||
| @@ -29,7 +29,10 @@ REGRESS_OPTS = --dbname=$(PL_TESTDB) | |||||||
| REGRESS = plpgsql_call plpgsql_control plpgsql_domain plpgsql_record \ | REGRESS = plpgsql_call plpgsql_control plpgsql_domain plpgsql_record \ | ||||||
| 	plpgsql_cache plpgsql_transaction plpgsql_trigger plpgsql_varprops | 	plpgsql_cache plpgsql_transaction plpgsql_trigger plpgsql_varprops | ||||||
|  |  | ||||||
| GEN_KEYWORDLIST = $(top_srcdir)/src/tools/gen_keywordlist.pl | # where to find gen_keywordlist.pl and subsidiary files | ||||||
|  | TOOLSDIR = $(top_srcdir)/src/tools | ||||||
|  | GEN_KEYWORDLIST = $(PERL) -I $(TOOLSDIR) $(TOOLSDIR)/gen_keywordlist.pl | ||||||
|  | GEN_KEYWORDLIST_DEPS = $(TOOLSDIR)/gen_keywordlist.pl $(TOOLSDIR)/PerfectHash.pm | ||||||
|  |  | ||||||
| all: all-lib | all: all-lib | ||||||
|  |  | ||||||
| @@ -76,11 +79,11 @@ plerrcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-plerrcodes.p | |||||||
| 	$(PERL) $(srcdir)/generate-plerrcodes.pl $< > $@ | 	$(PERL) $(srcdir)/generate-plerrcodes.pl $< > $@ | ||||||
|  |  | ||||||
| # generate keyword headers for the scanner | # generate keyword headers for the scanner | ||||||
| pl_reserved_kwlist_d.h: pl_reserved_kwlist.h $(GEN_KEYWORDLIST) | pl_reserved_kwlist_d.h: pl_reserved_kwlist.h $(GEN_KEYWORDLIST_DEPS) | ||||||
| 	$(PERL) $(GEN_KEYWORDLIST) --varname ReservedPLKeywords $< | 	$(GEN_KEYWORDLIST) --varname ReservedPLKeywords $< | ||||||
|  |  | ||||||
| pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h $(GEN_KEYWORDLIST) | pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h $(GEN_KEYWORDLIST_DEPS) | ||||||
| 	$(PERL) $(GEN_KEYWORDLIST) --varname UnreservedPLKeywords $< | 	$(GEN_KEYWORDLIST) --varname UnreservedPLKeywords $< | ||||||
|  |  | ||||||
|  |  | ||||||
| check: submake | check: submake | ||||||
|   | |||||||
| @@ -20,10 +20,9 @@ | |||||||
| /* | /* | ||||||
|  * List of (keyword-name, keyword-token-value) pairs. |  * List of (keyword-name, keyword-token-value) pairs. | ||||||
|  * |  * | ||||||
|  * Be careful not to put the same word in both lists. |  * Be careful not to put the same word into pl_unreserved_kwlist.h. | ||||||
|  * |  * | ||||||
|  * !!WARNING!!: This list must be sorted by ASCII name, because binary |  * Note: gen_keywordlist.pl requires the entries to appear in ASCII order. | ||||||
|  *		 search is used to locate entries. |  | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| /* name, value */ | /* name, value */ | ||||||
|   | |||||||
| @@ -20,11 +20,10 @@ | |||||||
| /* | /* | ||||||
|  * List of (keyword-name, keyword-token-value) pairs. |  * List of (keyword-name, keyword-token-value) pairs. | ||||||
|  * |  * | ||||||
|  * Be careful not to put the same word in both lists.  Also be sure that |  * Be careful not to put the same word into pl_reserved_kwlist.h.  Also be | ||||||
|  * pl_gram.y's unreserved_keyword production agrees with this list. |  * sure that pl_gram.y's unreserved_keyword production agrees with this list. | ||||||
|  * |  * | ||||||
|  * !!WARNING!!: This list must be sorted by ASCII name, because binary |  * Note: gen_keywordlist.pl requires the entries to appear in ASCII order. | ||||||
|  *		 search is used to locate entries. |  | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| /* name, value */ | /* name, value */ | ||||||
|   | |||||||
							
								
								
									
										376
									
								
								src/tools/PerfectHash.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										376
									
								
								src/tools/PerfectHash.pm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,376 @@ | |||||||
|  | #---------------------------------------------------------------------- | ||||||
|  | # | ||||||
|  | # PerfectHash.pm | ||||||
|  | #    Perl module that constructs minimal perfect hash functions | ||||||
|  | # | ||||||
|  | # This code constructs a minimal perfect hash function for the given | ||||||
|  | # set of keys, using an algorithm described in | ||||||
|  | # "An optimal algorithm for generating minimal perfect hash functions" | ||||||
|  | # by Czech, Havas and Majewski in Information Processing Letters, | ||||||
|  | # 43(5):256-264, October 1992. | ||||||
|  | # This implementation is loosely based on NetBSD's "nbperf", | ||||||
|  | # which was written by Joerg Sonnenberger. | ||||||
|  | # | ||||||
|  | # The resulting hash function is perfect in the sense that if the presented | ||||||
|  | # key is one of the original set, it will return the key's index in the set | ||||||
|  | # (in range 0..N-1).  However, the caller must still verify the match, | ||||||
|  | # as false positives are possible.  Also, the hash function may return | ||||||
|  | # values that are out of range (negative or >= N), due to summing unrelated | ||||||
|  | # hashtable entries.  This indicates that the presented key is definitely | ||||||
|  | # not in the set. | ||||||
|  | # | ||||||
|  | # | ||||||
|  | # Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group | ||||||
|  | # Portions Copyright (c) 1994, Regents of the University of California | ||||||
|  | # | ||||||
|  | # src/tools/PerfectHash.pm | ||||||
|  | # | ||||||
|  | #---------------------------------------------------------------------- | ||||||
|  |  | ||||||
|  | package PerfectHash; | ||||||
|  |  | ||||||
|  | use strict; | ||||||
|  | use warnings; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # At runtime, we'll compute two simple hash functions of the input key, | ||||||
|  | # and use them to index into a mapping table.  The hash functions are just | ||||||
|  | # multiply-and-add in uint32 arithmetic, with different multipliers and | ||||||
|  | # initial seeds.  All the complexity in this module is concerned with | ||||||
|  | # selecting hash parameters that will work and building the mapping table. | ||||||
|  |  | ||||||
|  | # We support making case-insensitive hash functions, though this only | ||||||
|  | # works for a strict-ASCII interpretation of case insensitivity, | ||||||
|  | # ie, A-Z maps onto a-z and nothing else. | ||||||
|  | my $case_fold = 0; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # | ||||||
|  | # Construct a C function implementing a perfect hash for the given keys. | ||||||
|  | # The C function definition is returned as a string. | ||||||
|  | # | ||||||
|  | # The keys should be passed as an array reference.  They can be any set | ||||||
|  | # of Perl strings; it is caller's responsibility that there not be any | ||||||
|  | # duplicates.  (Note that the "strings" can be binary data, but hashing | ||||||
|  | # e.g. OIDs has endianness hazards that callers must overcome.) | ||||||
|  | # | ||||||
|  | # The name to use for the function is specified as the second argument. | ||||||
|  | # It will be a global function by default, but the caller may prepend | ||||||
|  | # "static " to the result string if it wants a static function. | ||||||
|  | # | ||||||
|  | # Additional options can be specified as keyword-style arguments: | ||||||
|  | # | ||||||
|  | # case_fold => bool | ||||||
|  | # If specified as true, the hash function is case-insensitive, for the | ||||||
|  | # limited idea of case-insensitivity explained above. | ||||||
|  | # | ||||||
|  | # fixed_key_length => N | ||||||
|  | # If specified, all keys are assumed to have length N bytes, and the | ||||||
|  | # hash function signature will be just "int f(const void *key)" | ||||||
|  | # rather than "int f(const void *key, size_t keylen)". | ||||||
|  | # | ||||||
|  | sub generate_hash_function | ||||||
|  | { | ||||||
|  | 	my ($keys_ref, $funcname, %options) = @_; | ||||||
|  |  | ||||||
|  | 	# It's not worth passing this around as a parameter; just use a global. | ||||||
|  | 	$case_fold = $options{case_fold} || 0; | ||||||
|  |  | ||||||
|  | 	# Try different hash function parameters until we find a set that works | ||||||
|  | 	# for these keys.  The multipliers are chosen to be primes that are cheap | ||||||
|  | 	# to calculate via shift-and-add, so don't change them without care. | ||||||
|  | 	# (Commonly, random seeds are tried, but we want reproducible results | ||||||
|  | 	# from this program so we don't do that.) | ||||||
|  | 	my $hash_mult1 = 31; | ||||||
|  | 	my $hash_mult2; | ||||||
|  | 	my $hash_seed1; | ||||||
|  | 	my $hash_seed2; | ||||||
|  | 	my @subresult; | ||||||
|  |   FIND_PARAMS: | ||||||
|  | 	foreach (127, 257, 521, 1033, 2053) | ||||||
|  | 	{ | ||||||
|  | 		$hash_mult2 = $_;    # "foreach $hash_mult2" doesn't work | ||||||
|  | 		for ($hash_seed1 = 0; $hash_seed1 < 10; $hash_seed1++) | ||||||
|  | 		{ | ||||||
|  | 			for ($hash_seed2 = 0; $hash_seed2 < 10; $hash_seed2++) | ||||||
|  | 			{ | ||||||
|  | 				@subresult = _construct_hash_table( | ||||||
|  | 					$keys_ref,   $hash_mult1, $hash_mult2, | ||||||
|  | 					$hash_seed1, $hash_seed2); | ||||||
|  | 				last FIND_PARAMS if @subresult; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# Choke if we couldn't find a workable set of parameters. | ||||||
|  | 	die "failed to generate perfect hash" if !@subresult; | ||||||
|  |  | ||||||
|  | 	# Extract info from _construct_hash_table's result array. | ||||||
|  | 	my $elemtype = $subresult[0]; | ||||||
|  | 	my @hashtab  = @{ $subresult[1] }; | ||||||
|  | 	my $nhash    = scalar(@hashtab); | ||||||
|  |  | ||||||
|  | 	# OK, construct the hash function definition including the hash table. | ||||||
|  | 	my $f = ''; | ||||||
|  | 	$f .= sprintf "int\n"; | ||||||
|  | 	if (defined $options{fixed_key_length}) | ||||||
|  | 	{ | ||||||
|  | 		$f .= sprintf "%s(const void *key)\n{\n", $funcname; | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		$f .= sprintf "%s(const void *key, size_t keylen)\n{\n", $funcname; | ||||||
|  | 	} | ||||||
|  | 	$f .= sprintf "\tstatic const %s h[%d] = {\n", $elemtype, $nhash; | ||||||
|  | 	for (my $i = 0; $i < $nhash; $i++) | ||||||
|  | 	{ | ||||||
|  | 		$f .= sprintf "%s%6d,%s", | ||||||
|  | 		  ($i % 8 == 0 ? "\t\t" : " "), | ||||||
|  | 		  $hashtab[$i], | ||||||
|  | 		  ($i % 8 == 7 ? "\n" : ""); | ||||||
|  | 	} | ||||||
|  | 	$f .= sprintf "\n" if ($nhash % 8 != 0); | ||||||
|  | 	$f .= sprintf "\t};\n\n"; | ||||||
|  | 	$f .= sprintf "\tconst unsigned char *k = key;\n"; | ||||||
|  | 	$f .= sprintf "\tsize_t\t\tkeylen = %d;\n", $options{fixed_key_length} | ||||||
|  | 	  if (defined $options{fixed_key_length}); | ||||||
|  | 	$f .= sprintf "\tuint32\t\ta = %d;\n",   $hash_seed1; | ||||||
|  | 	$f .= sprintf "\tuint32\t\tb = %d;\n\n", $hash_seed2; | ||||||
|  | 	$f .= sprintf "\twhile (keylen--)\n\t{\n"; | ||||||
|  | 	$f .= sprintf "\t\tunsigned char c = *k++"; | ||||||
|  | 	$f .= sprintf " | 0x20" if $case_fold;    # see comment below | ||||||
|  | 	$f .= sprintf ";\n\n"; | ||||||
|  | 	$f .= sprintf "\t\ta = a * %d + c;\n", $hash_mult1; | ||||||
|  | 	$f .= sprintf "\t\tb = b * %d + c;\n", $hash_mult2; | ||||||
|  | 	$f .= sprintf "\t}\n"; | ||||||
|  | 	$f .= sprintf "\treturn h[a %% %d] + h[b %% %d];\n", $nhash, $nhash; | ||||||
|  | 	$f .= sprintf "}\n"; | ||||||
|  |  | ||||||
|  | 	return $f; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Calculate a hash function as the run-time code will do. | ||||||
|  | # | ||||||
|  | # If we are making a case-insensitive hash function, we implement that | ||||||
|  | # by OR'ing 0x20 into each byte of the key.  This correctly transforms | ||||||
|  | # upper-case ASCII into lower-case ASCII, while not changing digits or | ||||||
|  | # dollar signs.  (It does change '_', as well as other characters not | ||||||
|  | # likely to appear in keywords; this has little effect on the hash's | ||||||
|  | # ability to discriminate keywords.) | ||||||
|  | sub _calc_hash | ||||||
|  | { | ||||||
|  | 	my ($key, $mult, $seed) = @_; | ||||||
|  |  | ||||||
|  | 	my $result = $seed; | ||||||
|  | 	for my $c (split //, $key) | ||||||
|  | 	{ | ||||||
|  | 		my $cn = ord($c); | ||||||
|  | 		$cn |= 0x20 if $case_fold; | ||||||
|  | 		$result = ($result * $mult + $cn) % 4294967296; | ||||||
|  | 	} | ||||||
|  | 	return $result; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Attempt to construct a mapping table for a minimal perfect hash function | ||||||
|  | # for the given keys, using the specified hash parameters. | ||||||
|  | # | ||||||
|  | # Returns an array containing the mapping table element type name as the | ||||||
|  | # first element, and a ref to an array of the table values as the second. | ||||||
|  | # | ||||||
|  | # Returns an empty array on failure; then caller should choose different | ||||||
|  | # hash parameter(s) and try again. | ||||||
|  | sub _construct_hash_table | ||||||
|  | { | ||||||
|  | 	my ($keys_ref, $hash_mult1, $hash_mult2, $hash_seed1, $hash_seed2) = @_; | ||||||
|  | 	my @keys = @{$keys_ref}; | ||||||
|  |  | ||||||
|  | 	# This algorithm is based on a graph whose edges correspond to the | ||||||
|  | 	# keys and whose vertices correspond to entries of the mapping table. | ||||||
|  | 	# A key's edge links the two vertices whose indexes are the outputs of | ||||||
|  | 	# the two hash functions for that key.  For K keys, the mapping | ||||||
|  | 	# table must have at least 2*K+1 entries, guaranteeing that there's at | ||||||
|  | 	# least one unused entry.  (In principle, larger mapping tables make it | ||||||
|  | 	# easier to find a workable hash and increase the number of inputs that | ||||||
|  | 	# can be rejected due to touching unused hashtable entries.  In practice, | ||||||
|  | 	# neither effect seems strong enough to justify using a larger table.) | ||||||
|  | 	my $nedges = scalar @keys;       # number of edges | ||||||
|  | 	my $nverts = 2 * $nedges + 1;    # number of vertices | ||||||
|  |  | ||||||
|  | 	# However, it would be very bad if $nverts were exactly equal to either | ||||||
|  | 	# $hash_mult1 or $hash_mult2: effectively, that hash function would be | ||||||
|  | 	# sensitive to only the last byte of each key.  Cases where $nverts is a | ||||||
|  | 	# multiple of either multiplier likewise lose information.  (But $nverts | ||||||
|  | 	# can't actually divide them, if they've been intelligently chosen as | ||||||
|  | 	# primes.)  We can avoid such problems by adjusting the table size. | ||||||
|  | 	while ($nverts % $hash_mult1 == 0 | ||||||
|  | 		|| $nverts % $hash_mult2 == 0) | ||||||
|  | 	{ | ||||||
|  | 		$nverts++; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# Initialize the array of edges. | ||||||
|  | 	my @E = (); | ||||||
|  | 	foreach my $kw (@keys) | ||||||
|  | 	{ | ||||||
|  | 		# Calculate hashes for this key. | ||||||
|  | 		# The hashes are immediately reduced modulo the mapping table size. | ||||||
|  | 		my $hash1 = _calc_hash($kw, $hash_mult1, $hash_seed1) % $nverts; | ||||||
|  | 		my $hash2 = _calc_hash($kw, $hash_mult2, $hash_seed2) % $nverts; | ||||||
|  |  | ||||||
|  | 		# If the two hashes are the same for any key, we have to fail | ||||||
|  | 		# since this edge would itself form a cycle in the graph. | ||||||
|  | 		return () if $hash1 == $hash2; | ||||||
|  |  | ||||||
|  | 		# Add the edge for this key. | ||||||
|  | 		push @E, { left => $hash1, right => $hash2 }; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# Initialize the array of vertices, giving them all empty lists | ||||||
|  | 	# of associated edges.  (The lists will be hashes of edge numbers.) | ||||||
|  | 	my @V = (); | ||||||
|  | 	for (my $v = 0; $v < $nverts; $v++) | ||||||
|  | 	{ | ||||||
|  | 		push @V, { edges => {} }; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# Insert each edge in the lists of edges connected to its vertices. | ||||||
|  | 	for (my $e = 0; $e < $nedges; $e++) | ||||||
|  | 	{ | ||||||
|  | 		my $v = $E[$e]{left}; | ||||||
|  | 		$V[$v]{edges}->{$e} = 1; | ||||||
|  |  | ||||||
|  | 		$v = $E[$e]{right}; | ||||||
|  | 		$V[$v]{edges}->{$e} = 1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# Now we attempt to prove the graph acyclic. | ||||||
|  | 	# A cycle-free graph is either empty or has some vertex of degree 1. | ||||||
|  | 	# Removing the edge attached to that vertex doesn't change this property, | ||||||
|  | 	# so doing that repeatedly will reduce the size of the graph. | ||||||
|  | 	# If the graph is empty at the end of the process, it was acyclic. | ||||||
|  | 	# We track the order of edge removal so that the next phase can process | ||||||
|  | 	# them in reverse order of removal. | ||||||
|  | 	my @output_order = (); | ||||||
|  |  | ||||||
|  | 	# Consider each vertex as a possible starting point for edge-removal. | ||||||
|  | 	for (my $startv = 0; $startv < $nverts; $startv++) | ||||||
|  | 	{ | ||||||
|  | 		my $v = $startv; | ||||||
|  |  | ||||||
|  | 		# If vertex v is of degree 1 (i.e. exactly 1 edge connects to it), | ||||||
|  | 		# remove that edge, and then consider the edge's other vertex to see | ||||||
|  | 		# if it is now of degree 1.  The inner loop repeats until reaching a | ||||||
|  | 		# vertex not of degree 1. | ||||||
|  | 		while (scalar(keys(%{ $V[$v]{edges} })) == 1) | ||||||
|  | 		{ | ||||||
|  | 			# Unlink its only edge. | ||||||
|  | 			my $e = (keys(%{ $V[$v]{edges} }))[0]; | ||||||
|  | 			delete($V[$v]{edges}->{$e}); | ||||||
|  |  | ||||||
|  | 			# Unlink the edge from its other vertex, too. | ||||||
|  | 			my $v2 = $E[$e]{left}; | ||||||
|  | 			$v2 = $E[$e]{right} if ($v2 == $v); | ||||||
|  | 			delete($V[$v2]{edges}->{$e}); | ||||||
|  |  | ||||||
|  | 			# Push e onto the front of the output-order list. | ||||||
|  | 			unshift @output_order, $e; | ||||||
|  |  | ||||||
|  | 			# Consider v2 on next iteration of inner loop. | ||||||
|  | 			$v = $v2; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# We succeeded only if all edges were removed from the graph. | ||||||
|  | 	return () if (scalar(@output_order) != $nedges); | ||||||
|  |  | ||||||
|  | 	# OK, build the hash table of size $nverts. | ||||||
|  | 	my @hashtab = (0) x $nverts; | ||||||
|  | 	# We need a "visited" flag array in this step, too. | ||||||
|  | 	my @visited = (0) x $nverts; | ||||||
|  |  | ||||||
|  | 	# The goal is that for any key, the sum of the hash table entries for | ||||||
|  | 	# its first and second hash values is the desired output (i.e., the key | ||||||
|  | 	# number).  By assigning hash table values in the selected edge order, | ||||||
|  | 	# we can guarantee that that's true.  This works because the edge first | ||||||
|  | 	# removed from the graph (and hence last to be visited here) must have | ||||||
|  | 	# at least one vertex it shared with no other edge; hence it will have at | ||||||
|  | 	# least one vertex (hashtable entry) still unvisited when we reach it here, | ||||||
|  | 	# and we can assign that unvisited entry a value that makes the sum come | ||||||
|  | 	# out as we wish.  By induction, the same holds for all the other edges. | ||||||
|  | 	foreach my $e (@output_order) | ||||||
|  | 	{ | ||||||
|  | 		my $l = $E[$e]{left}; | ||||||
|  | 		my $r = $E[$e]{right}; | ||||||
|  | 		if (!$visited[$l]) | ||||||
|  | 		{ | ||||||
|  | 			# $hashtab[$r] might be zero, or some previously assigned value. | ||||||
|  | 			$hashtab[$l] = $e - $hashtab[$r]; | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			die "oops, doubly used hashtab entry" if $visited[$r]; | ||||||
|  | 			# $hashtab[$l] might be zero, or some previously assigned value. | ||||||
|  | 			$hashtab[$r] = $e - $hashtab[$l]; | ||||||
|  | 		} | ||||||
|  | 		# Now freeze both of these hashtab entries. | ||||||
|  | 		$visited[$l] = 1; | ||||||
|  | 		$visited[$r] = 1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# Detect range of values needed in hash table. | ||||||
|  | 	my $hmin = $nedges; | ||||||
|  | 	my $hmax = 0; | ||||||
|  | 	for (my $v = 0; $v < $nverts; $v++) | ||||||
|  | 	{ | ||||||
|  | 		$hmin = $hashtab[$v] if $hashtab[$v] < $hmin; | ||||||
|  | 		$hmax = $hashtab[$v] if $hashtab[$v] > $hmax; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# Choose width of hashtable entries.  In addition to the actual values, | ||||||
|  | 	# we need to be able to store a flag for unused entries, and we wish to | ||||||
|  | 	# have the property that adding any other entry value to the flag gives | ||||||
|  | 	# an out-of-range result (>= $nedges). | ||||||
|  | 	my $elemtype; | ||||||
|  | 	my $unused_flag; | ||||||
|  |  | ||||||
|  | 	if (   $hmin >= -0x7F | ||||||
|  | 		&& $hmax <= 0x7F | ||||||
|  | 		&& $hmin + 0x7F >= $nedges) | ||||||
|  | 	{ | ||||||
|  | 		# int8 will work | ||||||
|  | 		$elemtype    = 'int8'; | ||||||
|  | 		$unused_flag = 0x7F; | ||||||
|  | 	} | ||||||
|  | 	elsif ($hmin >= -0x7FFF | ||||||
|  | 		&& $hmax <= 0x7FFF | ||||||
|  | 		&& $hmin + 0x7FFF >= $nedges) | ||||||
|  | 	{ | ||||||
|  | 		# int16 will work | ||||||
|  | 		$elemtype    = 'int16'; | ||||||
|  | 		$unused_flag = 0x7FFF; | ||||||
|  | 	} | ||||||
|  | 	elsif ($hmin >= -0x7FFFFFFF | ||||||
|  | 		&& $hmax <= 0x7FFFFFFF | ||||||
|  | 		&& $hmin + 0x3FFFFFFF >= $nedges) | ||||||
|  | 	{ | ||||||
|  | 		# int32 will work | ||||||
|  | 		$elemtype    = 'int32'; | ||||||
|  | 		$unused_flag = 0x3FFFFFFF; | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		die "hash table values too wide"; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	# Set any unvisited hashtable entries to $unused_flag. | ||||||
|  | 	for (my $v = 0; $v < $nverts; $v++) | ||||||
|  | 	{ | ||||||
|  | 		$hashtab[$v] = $unused_flag if !$visited[$v]; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return ($elemtype, \@hashtab); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | 1; | ||||||
| @@ -14,6 +14,12 @@ | |||||||
| # variable named according to the -v switch ("ScanKeywords" by default). | # variable named according to the -v switch ("ScanKeywords" by default). | ||||||
| # The variable is marked "static" unless the -e switch is given. | # The variable is marked "static" unless the -e switch is given. | ||||||
| # | # | ||||||
|  | # ScanKeywordList uses hash-based lookup, so this script also selects | ||||||
|  | # a minimal perfect hash function for the keyword set, and emits a | ||||||
|  | # static hash function that is referenced in the ScanKeywordList struct. | ||||||
|  | # The hash function is case-insensitive unless --no-case-fold is specified. | ||||||
|  | # Note that case folding works correctly only for all-ASCII keywords! | ||||||
|  | # | ||||||
| # | # | ||||||
| # Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group | # Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group | ||||||
| # Portions Copyright (c) 1994, Regents of the University of California | # Portions Copyright (c) 1994, Regents of the University of California | ||||||
| @@ -25,14 +31,17 @@ | |||||||
| use strict; | use strict; | ||||||
| use warnings; | use warnings; | ||||||
| use Getopt::Long; | use Getopt::Long; | ||||||
|  | use PerfectHash; | ||||||
|  |  | ||||||
| my $output_path = ''; | my $output_path = ''; | ||||||
| my $extern = 0; | my $extern = 0; | ||||||
|  | my $case_fold = 1; | ||||||
| my $varname = 'ScanKeywords'; | my $varname = 'ScanKeywords'; | ||||||
|  |  | ||||||
| GetOptions( | GetOptions( | ||||||
| 	'output:s'   => \$output_path, | 	'output:s'   => \$output_path, | ||||||
| 	'extern'     => \$extern, | 	'extern'     => \$extern, | ||||||
|  | 	'case-fold!' => \$case_fold, | ||||||
| 	'varname:s'  => \$varname) || usage(); | 	'varname:s'  => \$varname) || usage(); | ||||||
|  |  | ||||||
| my $kw_input_file = shift @ARGV || die "No input file.\n"; | my $kw_input_file = shift @ARGV || die "No input file.\n"; | ||||||
| @@ -87,7 +96,22 @@ while (<$kif>) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | # When being case-insensitive, insist that the input be all-lower-case. | ||||||
|  | if ($case_fold) | ||||||
|  | { | ||||||
|  | 	foreach my $kw (@keywords) | ||||||
|  | 	{ | ||||||
|  | 		die qq|The keyword "$kw" is not lower-case in $kw_input_file\n| | ||||||
|  | 		  if ($kw ne lc $kw); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| # Error out if the keyword names are not in ASCII order. | # Error out if the keyword names are not in ASCII order. | ||||||
|  | # | ||||||
|  | # While this isn't really necessary with hash-based lookup, it's still | ||||||
|  | # helpful because it provides a cheap way to reject duplicate keywords. | ||||||
|  | # Also, insisting on sorted order ensures that code that scans the keyword | ||||||
|  | # table linearly will see the keywords in a canonical order. | ||||||
| for my $i (0..$#keywords - 1) | for my $i (0..$#keywords - 1) | ||||||
| { | { | ||||||
| 	die qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file\n| | 	die qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file\n| | ||||||
| @@ -128,15 +152,25 @@ print $kwdef "};\n\n"; | |||||||
|  |  | ||||||
| printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords; | printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords; | ||||||
|  |  | ||||||
|  | # Emit the definition of the hash function. | ||||||
|  |  | ||||||
|  | my $funcname = $varname . "_hash_func"; | ||||||
|  |  | ||||||
|  | my $f = PerfectHash::generate_hash_function(\@keywords, $funcname, | ||||||
|  | 	case_fold => $case_fold); | ||||||
|  |  | ||||||
|  | printf $kwdef qq|static %s\n|, $f; | ||||||
|  |  | ||||||
| # Emit the struct that wraps all this lookup info into one variable. | # Emit the struct that wraps all this lookup info into one variable. | ||||||
|  |  | ||||||
| print $kwdef "static " if !$extern; | printf $kwdef "static " if !$extern; | ||||||
| printf $kwdef "const ScanKeywordList %s = {\n", $varname; | printf $kwdef "const ScanKeywordList %s = {\n", $varname; | ||||||
| printf $kwdef qq|\t%s_kw_string,\n|, $varname; | printf $kwdef qq|\t%s_kw_string,\n|, $varname; | ||||||
| printf $kwdef qq|\t%s_kw_offsets,\n|, $varname; | printf $kwdef qq|\t%s_kw_offsets,\n|, $varname; | ||||||
|  | printf $kwdef qq|\t%s,\n|, $funcname; | ||||||
| printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname; | printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname; | ||||||
| printf $kwdef qq|\t%d\n|, $max_len; | printf $kwdef qq|\t%d\n|, $max_len; | ||||||
| print $kwdef "};\n\n"; | printf $kwdef "};\n\n"; | ||||||
|  |  | ||||||
| printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename; | printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename; | ||||||
|  |  | ||||||
| @@ -144,10 +178,11 @@ printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename; | |||||||
| sub usage | sub usage | ||||||
| { | { | ||||||
| 	die <<EOM; | 	die <<EOM; | ||||||
| Usage: gen_keywordlist.pl [--output/-o <path>] [--varname/-v <varname>] [--extern/-e] input_file | Usage: gen_keywordlist.pl [--output/-o <path>] [--varname/-v <varname>] [--extern/-e] [--[no-]case-fold] input_file | ||||||
|     --output        Output directory (default '.') |     --output        Output directory (default '.') | ||||||
|     --varname       Name for ScanKeywordList variable (default 'ScanKeywords') |     --varname       Name for ScanKeywordList variable (default 'ScanKeywords') | ||||||
|     --extern        Allow the ScanKeywordList variable to be globally visible |     --extern        Allow the ScanKeywordList variable to be globally visible | ||||||
|  |     --no-case-fold  Keyword matching is to be case-sensitive | ||||||
|  |  | ||||||
| gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList. | gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList. | ||||||
| The output filename is derived from the input file by inserting _d, | The output filename is derived from the input file by inserting _d, | ||||||
|   | |||||||
| @@ -414,7 +414,7 @@ sub GenerateFiles | |||||||
| 			'src/include/parser/kwlist.h')) | 			'src/include/parser/kwlist.h')) | ||||||
| 	{ | 	{ | ||||||
| 		print "Generating kwlist_d.h...\n"; | 		print "Generating kwlist_d.h...\n"; | ||||||
| 		system('perl src/tools/gen_keywordlist.pl --extern -o src/common src/include/parser/kwlist.h'); | 		system('perl -I src/tools src/tools/gen_keywordlist.pl --extern -o src/common src/include/parser/kwlist.h'); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if (IsNewer( | 	if (IsNewer( | ||||||
| @@ -426,8 +426,8 @@ sub GenerateFiles | |||||||
| 	{ | 	{ | ||||||
| 		print "Generating pl_reserved_kwlist_d.h and pl_unreserved_kwlist_d.h...\n"; | 		print "Generating pl_reserved_kwlist_d.h and pl_unreserved_kwlist_d.h...\n"; | ||||||
| 		chdir('src/pl/plpgsql/src'); | 		chdir('src/pl/plpgsql/src'); | ||||||
| 		system('perl ../../../tools/gen_keywordlist.pl --varname ReservedPLKeywords pl_reserved_kwlist.h'); | 		system('perl -I ../../../tools ../../../tools/gen_keywordlist.pl --varname ReservedPLKeywords pl_reserved_kwlist.h'); | ||||||
| 		system('perl ../../../tools/gen_keywordlist.pl --varname UnreservedPLKeywords pl_unreserved_kwlist.h'); | 		system('perl -I ../../../tools ../../../tools/gen_keywordlist.pl --varname UnreservedPLKeywords pl_unreserved_kwlist.h'); | ||||||
| 		chdir('../../../..'); | 		chdir('../../../..'); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -440,8 +440,8 @@ sub GenerateFiles | |||||||
| 	{ | 	{ | ||||||
| 		print "Generating c_kwlist_d.h and ecpg_kwlist_d.h...\n"; | 		print "Generating c_kwlist_d.h and ecpg_kwlist_d.h...\n"; | ||||||
| 		chdir('src/interfaces/ecpg/preproc'); | 		chdir('src/interfaces/ecpg/preproc'); | ||||||
| 		system('perl ../../../tools/gen_keywordlist.pl --varname ScanCKeywords c_kwlist.h'); | 		system('perl -I ../../../tools ../../../tools/gen_keywordlist.pl --varname ScanCKeywords --no-case-fold c_kwlist.h'); | ||||||
| 		system('perl ../../../tools/gen_keywordlist.pl --varname ScanECPGKeywords ecpg_kwlist.h'); | 		system('perl -I ../../../tools ../../../tools/gen_keywordlist.pl --varname ScanECPGKeywords ecpg_kwlist.h'); | ||||||
| 		chdir('../../../..'); | 		chdir('../../../..'); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user