mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-05 15:55:57 +03:00
Tighter encoding of the keyword hash table in the tokenizer. (CVS 2028)
FossilOrigin-Name: 7b9886f8d4db366bc7dbf25495f0d3b907d25689
This commit is contained in:
14
manifest
14
manifest
@@ -1,5 +1,5 @@
|
|||||||
C Add\sthe\sexperimental\sand\sscary\spragma\s"writable_schema".\s(CVS\s2027)
|
C Tighter\sencoding\sof\sthe\skeyword\shash\stable\sin\sthe\stokenizer.\s(CVS\s2028)
|
||||||
D 2004-10-22T20:29:22
|
D 2004-10-23T05:10:18
|
||||||
F Makefile.in 52c1cc106cad9148d4b7cb387b458e82dc86b339
|
F Makefile.in 52c1cc106cad9148d4b7cb387b458e82dc86b339
|
||||||
F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457
|
F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457
|
||||||
F README a01693e454a00cc117967e3f9fdab2d4d52e9bc1
|
F README a01693e454a00cc117967e3f9fdab2d4d52e9bc1
|
||||||
@@ -70,7 +70,7 @@ F src/test2.c b11fa244fff02190707dd0879987c37c75e61fc8
|
|||||||
F src/test3.c 5b5b0f3d11b097399c1054fff73d8f3711092301
|
F src/test3.c 5b5b0f3d11b097399c1054fff73d8f3711092301
|
||||||
F src/test4.c 7c6b9fc33dd1f3f93c7f1ee6e5e6d016afa6c1df
|
F src/test4.c 7c6b9fc33dd1f3f93c7f1ee6e5e6d016afa6c1df
|
||||||
F src/test5.c b001fa7f1b9e2dc5c2331de62fc641b5ab2bd7a1
|
F src/test5.c b001fa7f1b9e2dc5c2331de62fc641b5ab2bd7a1
|
||||||
F src/tokenize.c a235e9a38ccf175f7badccdec68f58d79cb5abe5
|
F src/tokenize.c f073bbf85fe527e1a8a191a1fda88313c09710ad
|
||||||
F src/trigger.c b51a120d53e8b85359be11bf5e50854e5725fe3d
|
F src/trigger.c b51a120d53e8b85359be11bf5e50854e5725fe3d
|
||||||
F src/update.c 174c3b593b8f4928e510a51ec309e8ce69d2371c
|
F src/update.c 174c3b593b8f4928e510a51ec309e8ce69d2371c
|
||||||
F src/utf.c f4f83acd73389090e32d6589d307fc55d794c7ed
|
F src/utf.c f4f83acd73389090e32d6589d307fc55d794c7ed
|
||||||
@@ -201,7 +201,7 @@ F tool/lempar.c 1e61d2b6cb9d8affa264a13336bc0c088498caa4
|
|||||||
F tool/memleak.awk b744b6109566206c746d826f6ecdba34662216bc
|
F tool/memleak.awk b744b6109566206c746d826f6ecdba34662216bc
|
||||||
F tool/memleak2.awk 9cc20c8e8f3c675efac71ea0721ee6874a1566e8
|
F tool/memleak2.awk 9cc20c8e8f3c675efac71ea0721ee6874a1566e8
|
||||||
F tool/memleak3.tcl 336eb50b0849dbf99b1d5462d9c37291b01b2b43
|
F tool/memleak3.tcl 336eb50b0849dbf99b1d5462d9c37291b01b2b43
|
||||||
F tool/mkkeywordhash.c 0cfab6691def993d5b2124398d25129b12b6b5fd
|
F tool/mkkeywordhash.c ed1f2a21a4e7f2757281c4cf7a8392c0e0ad2d5d
|
||||||
F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e x
|
F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e x
|
||||||
F tool/opcodeDoc.awk b3a2a3d5d3075b8bd90b7afe24283efdd586659c
|
F tool/opcodeDoc.awk b3a2a3d5d3075b8bd90b7afe24283efdd586659c
|
||||||
F tool/report1.txt 9eae07f26a8fc53889b45fc833a66a33daa22816
|
F tool/report1.txt 9eae07f26a8fc53889b45fc833a66a33daa22816
|
||||||
@@ -252,7 +252,7 @@ F www/tclsqlite.tcl 560ecd6a916b320e59f2917317398f3d59b7cc25
|
|||||||
F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9
|
F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9
|
||||||
F www/version3.tcl 092a01f5ef430d2c4acc0ae558d74c4bb89638a0
|
F www/version3.tcl 092a01f5ef430d2c4acc0ae558d74c4bb89638a0
|
||||||
F www/whentouse.tcl fdacb0ba2d39831e8a6240d05a490026ad4c4e4c
|
F www/whentouse.tcl fdacb0ba2d39831e8a6240d05a490026ad4c4e4c
|
||||||
P 0539c2d2b8e16efcbe4db3afeae9c7b426e11b05
|
P 39f7870a54d90d5163fcad3f08cd63699c4bb567
|
||||||
R 3f1f2941d330b2b80181d225c2988800
|
R f3fcd5be76f79a1c63d8d5fffbad49dd
|
||||||
U drh
|
U drh
|
||||||
Z 3239e241a62fc5e2facb9ebbd32227e4
|
Z 91960f90f1e7a02a6b795447e3b7377d
|
||||||
|
@@ -1 +1 @@
|
|||||||
39f7870a54d90d5163fcad3f08cd63699c4bb567
|
7b9886f8d4db366bc7dbf25495f0d3b907d25689
|
133
src/tokenize.c
133
src/tokenize.c
@@ -15,7 +15,7 @@
|
|||||||
** individual tokens and sends those tokens one-by-one over to the
|
** individual tokens and sends those tokens one-by-one over to the
|
||||||
** parser for analysis.
|
** parser for analysis.
|
||||||
**
|
**
|
||||||
** $Id: tokenize.c,v 1.91 2004/10/07 19:03:01 drh Exp $
|
** $Id: tokenize.c,v 1.92 2004/10/23 05:10:18 drh Exp $
|
||||||
*/
|
*/
|
||||||
#include "sqliteInt.h"
|
#include "sqliteInt.h"
|
||||||
#include "os.h"
|
#include "os.h"
|
||||||
@@ -35,81 +35,79 @@
|
|||||||
** the data tables) then rerun that program to regenerate this function.
|
** the data tables) then rerun that program to regenerate this function.
|
||||||
*/
|
*/
|
||||||
int sqlite3KeywordCode(const char *z, int n){
|
int sqlite3KeywordCode(const char *z, int n){
|
||||||
static const char zText[519] =
|
static const char zText[443] =
|
||||||
"ABORTAFTERALLANDASCATTACHBEFOREBEGINBETWEENBYCASCADECASECHECK"
|
"ABORTABLEFTEMPORARYAFTERAISELECTHENDATABASEACHECKEYANDEFAULTRANSACTION"
|
||||||
"COLLATECOMMITCONFLICTCONSTRAINTCREATECROSSDATABASEDEFAULTDEFERRABLE"
|
"ATURALIKELSEXCEPTRIGGEREFERENCESTATEMENTATTACHAVINGLOBEFOREIGN"
|
||||||
"DEFERREDDELETEDESCDETACHDISTINCTDROPEACHELSEENDEXCEPTEXCLUSIVE"
|
"OREPLACEXCLUSIVEXPLAINDEXBEGINITIALLYBETWEENOTNULLIMITBYCASCADE"
|
||||||
"EXPLAINFAILFOREIGNFROMFULLGLOBGROUPHAVINGIGNOREIMMEDIATEINDEX"
|
"FERRABLECASECOLLATECOMMITCONFLICTCONSTRAINTERSECTCREATECROSSDEFERRED"
|
||||||
"INITIALLYINNERINSERTINSTEADINTERSECTINTOISNULLJOINKEYLEFTLIKE"
|
"ELETEDESCDETACHDISTINCTDROPRAGMATCHFAILFROMFULLGROUPDATEIMMEDIATE"
|
||||||
"LIMITMATCHNATURALNOTNULLNULLOFFSETONORDEROUTERPRAGMAPRIMARYRAISE"
|
"INNERESTRICTINSERTINSTEADINTOFFSETISNULLJOINORDERIGHTOUTEROLLBACK"
|
||||||
"REFERENCESREPLACERESTRICTRIGHTROLLBACKROWSELECTSETSTATEMENTTABLE"
|
"PRIMARYROWHENUNIONUNIQUEUSINGVACUUMVALUESVIEWHERE";
|
||||||
"TEMPORARYTHENTRANSACTIONTRIGGERUNIONUNIQUEUPDATEUSINGVACUUMVALUES"
|
|
||||||
"VIEWWHENWHERE";
|
|
||||||
static const unsigned char aHash[154] = {
|
static const unsigned char aHash[154] = {
|
||||||
0, 75, 82, 0, 0, 97, 80, 0, 83, 0, 0, 0, 0,
|
0, 26, 82, 0, 0, 91, 90, 0, 27, 0, 0, 0, 0,
|
||||||
0, 0, 6, 0, 95, 4, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 49, 0, 96, 17, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 96, 86, 8, 0, 26, 13, 7, 19, 15, 0, 0, 32,
|
0, 97, 5, 31, 0, 62, 51, 28, 58, 52, 0, 0, 60,
|
||||||
25, 0, 21, 31, 41, 0, 0, 0, 34, 27, 0, 0, 30,
|
61, 0, 12, 41, 50, 0, 0, 0, 36, 63, 0, 0, 15,
|
||||||
0, 0, 0, 9, 0, 10, 0, 0, 0, 0, 51, 0, 44,
|
0, 0, 0, 39, 0, 42, 0, 0, 0, 0, 78, 0, 34,
|
||||||
43, 0, 45, 40, 0, 29, 39, 35, 0, 0, 20, 0, 59,
|
29, 0, 74, 71, 0, 66, 70, 37, 0, 0, 59, 0, 33,
|
||||||
0, 16, 0, 17, 0, 18, 0, 55, 42, 72, 0, 33, 0,
|
0, 53, 0, 54, 0, 55, 0, 83, 72, 67, 0, 24, 0,
|
||||||
0, 61, 66, 56, 0, 0, 0, 0, 0, 0, 0, 54, 0,
|
0, 79, 80, 84, 0, 0, 0, 0, 0, 0, 0, 75, 0,
|
||||||
0, 0, 0, 0, 74, 50, 76, 64, 52, 0, 0, 0, 0,
|
0, 0, 0, 0, 45, 77, 35, 44, 57, 0, 0, 0, 0,
|
||||||
68, 84, 0, 47, 0, 58, 60, 92, 0, 0, 48, 0, 93,
|
20, 2, 0, 38, 0, 3, 46, 93, 0, 0, 40, 0, 94,
|
||||||
0, 63, 71, 98, 0, 0, 0, 0, 0, 67, 0, 0, 0,
|
0, 43, 87, 98, 0, 0, 0, 0, 0, 81, 0, 0, 0,
|
||||||
0, 87, 0, 0, 0, 0, 0, 90, 88, 0, 94,
|
0, 10, 0, 0, 0, 0, 0, 92, 19, 0, 95,
|
||||||
};
|
};
|
||||||
static const unsigned char aNext[98] = {
|
static const unsigned char aNext[98] = {
|
||||||
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,
|
||||||
0, 12, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0,
|
0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0,
|
||||||
0, 0, 0, 14, 3, 24, 0, 0, 0, 1, 22, 0, 0,
|
0, 0, 0, 0, 0, 18, 22, 0, 0, 0, 0, 0, 0,
|
||||||
36, 23, 28, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0,
|
0, 23, 0, 16, 21, 8, 0, 32, 0, 0, 30, 0, 48,
|
||||||
0, 49, 37, 0, 0, 0, 38, 0, 53, 0, 57, 62, 0,
|
0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 70, 46, 0, 65, 0, 0, 0, 0,
|
0, 56, 0, 1, 0, 69, 64, 0, 0, 65, 0, 0, 13,
|
||||||
69, 73, 0, 77, 0, 0, 0, 0, 0, 0, 81, 85, 0,
|
68, 0, 0, 76, 47, 0, 0, 0, 85, 6, 0, 89, 25,
|
||||||
91, 79, 78, 0, 0, 89, 0,
|
4, 73, 88, 86, 0, 0, 0,
|
||||||
};
|
};
|
||||||
static const unsigned char aLen[98] = {
|
static const unsigned char aLen[98] = {
|
||||||
5, 5, 3, 3, 2, 3, 6, 6, 5, 7, 2, 7, 4,
|
5, 5, 4, 4, 9, 2, 5, 5, 6, 4, 3, 8, 2,
|
||||||
5, 7, 6, 8, 10, 6, 5, 8, 7, 10, 8, 6, 4,
|
4, 5, 3, 3, 7, 11, 2, 7, 4, 4, 6, 7, 10,
|
||||||
6, 8, 4, 4, 4, 3, 6, 9, 7, 4, 3, 7, 4,
|
9, 6, 6, 4, 6, 3, 7, 6, 7, 9, 7, 5, 5,
|
||||||
4, 4, 5, 6, 6, 9, 2, 5, 9, 5, 6, 7, 9,
|
9, 3, 7, 3, 7, 4, 5, 2, 7, 3, 10, 4, 7,
|
||||||
4, 2, 6, 4, 3, 4, 4, 5, 5, 7, 3, 7, 4,
|
6, 8, 10, 2, 9, 6, 5, 8, 6, 4, 6, 8, 2,
|
||||||
2, 6, 2, 2, 5, 5, 6, 7, 5, 10, 7, 8, 5,
|
4, 6, 5, 4, 4, 4, 5, 6, 9, 5, 8, 6, 7,
|
||||||
8, 3, 6, 3, 9, 5, 4, 9, 4, 11, 7, 5, 6,
|
4, 2, 6, 3, 6, 4, 5, 5, 5, 8, 7, 3, 4,
|
||||||
6, 5, 6, 6, 4, 4, 5,
|
5, 6, 5, 6, 6, 4, 5,
|
||||||
};
|
};
|
||||||
static const unsigned short int aOffset[98] = {
|
static const unsigned short int aOffset[98] = {
|
||||||
0, 5, 10, 13, 16, 16, 19, 25, 31, 36, 43, 45, 52,
|
0, 4, 7, 10, 10, 14, 19, 23, 26, 31, 33, 35, 40,
|
||||||
56, 61, 68, 74, 82, 92, 98, 103, 111, 118, 128, 136, 142,
|
42, 44, 48, 51, 53, 59, 68, 69, 75, 78, 81, 86, 92,
|
||||||
146, 152, 160, 164, 168, 172, 175, 181, 190, 197, 201, 201, 208,
|
101, 110, 115, 120, 123, 125, 125, 129, 133, 139, 147, 152, 157,
|
||||||
212, 216, 220, 225, 231, 237, 246, 246, 251, 260, 265, 271, 278,
|
160, 165, 169, 175, 175, 178, 181, 186, 188, 189, 193, 203, 207,
|
||||||
287, 291, 291, 297, 301, 304, 308, 312, 317, 322, 329, 329, 336,
|
214, 220, 228, 235, 235, 244, 250, 255, 262, 268, 272, 278, 279,
|
||||||
340, 340, 346, 348, 348, 353, 358, 364, 371, 376, 386, 393, 401,
|
286, 289, 293, 298, 302, 306, 310, 313, 319, 328, 332, 340, 346,
|
||||||
406, 414, 417, 423, 426, 435, 440, 440, 449, 453, 464, 471, 476,
|
353, 356, 356, 359, 362, 368, 372, 376, 381, 385, 393, 400, 402,
|
||||||
482, 488, 493, 499, 505, 509, 513,
|
406, 411, 417, 422, 428, 434, 437,
|
||||||
};
|
};
|
||||||
static const unsigned char aCode[98] = {
|
static const unsigned char aCode[98] = {
|
||||||
TK_ABORT, TK_AFTER, TK_ALL, TK_AND, TK_AS,
|
TK_ABORT, TK_TABLE, TK_JOIN_KW, TK_TEMP, TK_TEMP,
|
||||||
TK_ASC, TK_ATTACH, TK_BEFORE, TK_BEGIN, TK_BETWEEN,
|
TK_OR, TK_AFTER, TK_RAISE, TK_SELECT, TK_THEN,
|
||||||
TK_BY, TK_CASCADE, TK_CASE, TK_CHECK, TK_COLLATE,
|
TK_END, TK_DATABASE, TK_AS, TK_EACH, TK_CHECK,
|
||||||
TK_COMMIT, TK_CONFLICT, TK_CONSTRAINT, TK_CREATE, TK_JOIN_KW,
|
TK_KEY, TK_AND, TK_DEFAULT, TK_TRANSACTION,TK_ON,
|
||||||
TK_DATABASE, TK_DEFAULT, TK_DEFERRABLE, TK_DEFERRED, TK_DELETE,
|
TK_JOIN_KW, TK_LIKE, TK_ELSE, TK_EXCEPT, TK_TRIGGER,
|
||||||
TK_DESC, TK_DETACH, TK_DISTINCT, TK_DROP, TK_EACH,
|
TK_REFERENCES, TK_STATEMENT, TK_ATTACH, TK_HAVING, TK_GLOB,
|
||||||
TK_ELSE, TK_END, TK_EXCEPT, TK_EXCLUSIVE, TK_EXPLAIN,
|
TK_BEFORE, TK_FOR, TK_FOREIGN, TK_IGNORE, TK_REPLACE,
|
||||||
TK_FAIL, TK_FOR, TK_FOREIGN, TK_FROM, TK_JOIN_KW,
|
TK_EXCLUSIVE, TK_EXPLAIN, TK_INDEX, TK_BEGIN, TK_INITIALLY,
|
||||||
TK_GLOB, TK_GROUP, TK_HAVING, TK_IGNORE, TK_IMMEDIATE,
|
TK_ALL, TK_BETWEEN, TK_NOT, TK_NOTNULL, TK_NULL,
|
||||||
TK_IN, TK_INDEX, TK_INITIALLY, TK_JOIN_KW, TK_INSERT,
|
TK_LIMIT, TK_BY, TK_CASCADE, TK_ASC, TK_DEFERRABLE,
|
||||||
TK_INSTEAD, TK_INTERSECT, TK_INTO, TK_IS, TK_ISNULL,
|
TK_CASE, TK_COLLATE, TK_COMMIT, TK_CONFLICT, TK_CONSTRAINT,
|
||||||
TK_JOIN, TK_KEY, TK_JOIN_KW, TK_LIKE, TK_LIMIT,
|
TK_IN, TK_INTERSECT, TK_CREATE, TK_JOIN_KW, TK_DEFERRED,
|
||||||
TK_MATCH, TK_JOIN_KW, TK_NOT, TK_NOTNULL, TK_NULL,
|
TK_DELETE, TK_DESC, TK_DETACH, TK_DISTINCT, TK_IS,
|
||||||
TK_OF, TK_OFFSET, TK_ON, TK_OR, TK_ORDER,
|
TK_DROP, TK_PRAGMA, TK_MATCH, TK_FAIL, TK_FROM,
|
||||||
TK_JOIN_KW, TK_PRAGMA, TK_PRIMARY, TK_RAISE, TK_REFERENCES,
|
TK_JOIN_KW, TK_GROUP, TK_UPDATE, TK_IMMEDIATE, TK_JOIN_KW,
|
||||||
TK_REPLACE, TK_RESTRICT, TK_JOIN_KW, TK_ROLLBACK, TK_ROW,
|
TK_RESTRICT, TK_INSERT, TK_INSTEAD, TK_INTO, TK_OF,
|
||||||
TK_SELECT, TK_SET, TK_STATEMENT, TK_TABLE, TK_TEMP,
|
TK_OFFSET, TK_SET, TK_ISNULL, TK_JOIN, TK_ORDER,
|
||||||
TK_TEMP, TK_THEN, TK_TRANSACTION,TK_TRIGGER, TK_UNION,
|
TK_JOIN_KW, TK_JOIN_KW, TK_ROLLBACK, TK_PRIMARY, TK_ROW,
|
||||||
TK_UNIQUE, TK_UPDATE, TK_USING, TK_VACUUM, TK_VALUES,
|
TK_WHEN, TK_UNION, TK_UNIQUE, TK_USING, TK_VACUUM,
|
||||||
TK_VIEW, TK_WHEN, TK_WHERE,
|
TK_VALUES, TK_VIEW, TK_WHERE,
|
||||||
};
|
};
|
||||||
int h, i;
|
int h, i;
|
||||||
if( n<2 ) return TK_ID;
|
if( n<2 ) return TK_ID;
|
||||||
@@ -124,6 +122,7 @@ int sqlite3KeywordCode(const char *z, int n){
|
|||||||
return TK_ID;
|
return TK_ID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** If X is a character that can be used in an identifier and
|
** If X is a character that can be used in an identifier and
|
||||||
** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then
|
** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then
|
||||||
|
@@ -15,10 +15,14 @@ typedef struct Keyword Keyword;
|
|||||||
struct Keyword {
|
struct Keyword {
|
||||||
char *zName; /* The keyword name */
|
char *zName; /* The keyword name */
|
||||||
char *zTokenType; /* Token value for this keyword */
|
char *zTokenType; /* Token value for this keyword */
|
||||||
|
int id; /* Unique ID for this record */
|
||||||
int hash; /* Hash on the keyword */
|
int hash; /* Hash on the keyword */
|
||||||
int offset; /* Offset to start of name string */
|
int offset; /* Offset to start of name string */
|
||||||
int len; /* Length of this keyword, not counting final \000 */
|
int len; /* Length of this keyword, not counting final \000 */
|
||||||
|
int prefix; /* Number of characters in prefix */
|
||||||
int iNext; /* Index in aKeywordTable[] of next with same hash */
|
int iNext; /* Index in aKeywordTable[] of next with same hash */
|
||||||
|
int substrId; /* Id to another keyword this keyword is embedded in */
|
||||||
|
int substrOffset; /* Offset into substrId for start of this keyword */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -153,10 +157,37 @@ const unsigned char sqlite3UpperToLower[] = {
|
|||||||
/*
|
/*
|
||||||
** Comparision function for two Keyword records
|
** Comparision function for two Keyword records
|
||||||
*/
|
*/
|
||||||
static int keywordCompare(const void *a, const void *b){
|
static int keywordCompare1(const void *a, const void *b){
|
||||||
const Keyword *pA = (Keyword*)a;
|
const Keyword *pA = (Keyword*)a;
|
||||||
const Keyword *pB = (Keyword*)b;
|
const Keyword *pB = (Keyword*)b;
|
||||||
return strcmp(pA->zName, pB->zName);
|
int n = pA->len - pB->len;
|
||||||
|
if( n==0 ){
|
||||||
|
n = strcmp(pA->zName, pB->zName);
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
static int keywordCompare2(const void *a, const void *b){
|
||||||
|
const Keyword *pA = (Keyword*)a;
|
||||||
|
const Keyword *pB = (Keyword*)b;
|
||||||
|
int n = strcmp(pA->zName, pB->zName);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
static int keywordCompare3(const void *a, const void *b){
|
||||||
|
const Keyword *pA = (Keyword*)a;
|
||||||
|
const Keyword *pB = (Keyword*)b;
|
||||||
|
int n = pA->offset - pB->offset;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Return a KeywordTable entry with the given id
|
||||||
|
*/
|
||||||
|
static Keyword *findById(int id){
|
||||||
|
int i;
|
||||||
|
for(i=0; i<NKEYWORD; i++){
|
||||||
|
if( aKeywordTable[i].id==id ) break;
|
||||||
|
}
|
||||||
|
return &aKeywordTable[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -164,31 +195,78 @@ static int keywordCompare(const void *a, const void *b){
|
|||||||
** output.
|
** output.
|
||||||
*/
|
*/
|
||||||
int main(int argc, char **argv){
|
int main(int argc, char **argv){
|
||||||
int i, j, h;
|
int i, j, k, h;
|
||||||
int bestSize, bestCount;
|
int bestSize, bestCount;
|
||||||
int count;
|
int count;
|
||||||
int nChar;
|
int nChar;
|
||||||
int aHash[1000]; /* 1000 is much bigger than NKEYWORD */
|
int aHash[1000]; /* 1000 is much bigger than NKEYWORD */
|
||||||
|
|
||||||
/* Make sure the table is sorted */
|
/* Fill in the lengths of strings and hashes for all entries. */
|
||||||
qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare);
|
|
||||||
|
|
||||||
/* Fill in the hash value, length, and offset for all entries */
|
|
||||||
nChar = 0;
|
|
||||||
for(i=0; i<NKEYWORD; i++){
|
for(i=0; i<NKEYWORD; i++){
|
||||||
Keyword *p = &aKeywordTable[i];
|
Keyword *p = &aKeywordTable[i];
|
||||||
p->len = strlen(p->zName);
|
p->len = strlen(p->zName);
|
||||||
/* p->hash = sqlite3HashNoCase(p->zName, p->len); */
|
|
||||||
p->hash = UpperToLower[p->zName[0]]*5 +
|
p->hash = UpperToLower[p->zName[0]]*5 +
|
||||||
UpperToLower[p->zName[p->len-1]]*3 + p->len;
|
UpperToLower[p->zName[p->len-1]]*3 + p->len;
|
||||||
|
p->id = i+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sort the table from shortest to longest keyword */
|
||||||
|
qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare1);
|
||||||
|
|
||||||
|
/* Look for short keywords embedded in longer keywords */
|
||||||
|
for(i=NKEYWORD-2; i>=0; i--){
|
||||||
|
Keyword *p = &aKeywordTable[i];
|
||||||
|
for(j=NKEYWORD-1; j>i && p->substrId==0; j--){
|
||||||
|
Keyword *pOther = &aKeywordTable[j];
|
||||||
|
if( pOther->substrId ) continue;
|
||||||
|
if( pOther->len<=p->len ) continue;
|
||||||
|
for(k=0; k<=pOther->len-p->len; k++){
|
||||||
|
if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){
|
||||||
|
p->substrId = pOther->id;
|
||||||
|
p->substrOffset = k;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sort the table into alphabetical order */
|
||||||
|
qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare2);
|
||||||
|
|
||||||
|
/* Fill in the offset for all entries */
|
||||||
|
nChar = 0;
|
||||||
|
for(i=0; i<NKEYWORD; i++){
|
||||||
|
Keyword *p = &aKeywordTable[i];
|
||||||
|
if( p->offset>0 || p->substrId ) continue;
|
||||||
p->offset = nChar;
|
p->offset = nChar;
|
||||||
if( i<NKEYWORD-1 && strncmp(p->zName, aKeywordTable[i+1].zName,p->len)==0 ){
|
|
||||||
/* This entry is a prefix of the one that follows. Do not advance
|
|
||||||
** the offset */
|
|
||||||
}else{
|
|
||||||
nChar += p->len;
|
nChar += p->len;
|
||||||
|
for(k=p->len-1; k>=1; k--){
|
||||||
|
for(j=i+1; j<NKEYWORD; j++){
|
||||||
|
Keyword *pOther = &aKeywordTable[j];
|
||||||
|
if( pOther->offset>0 || pOther->substrId ) continue;
|
||||||
|
if( pOther->len<=k ) continue;
|
||||||
|
if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
|
||||||
|
p = pOther;
|
||||||
|
p->offset = nChar - k;
|
||||||
|
nChar = p->offset + p->len;
|
||||||
|
p->zName += k;
|
||||||
|
p->len -= k;
|
||||||
|
p->prefix = k;
|
||||||
|
j = i;
|
||||||
|
k = p->len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(i=0; i<NKEYWORD; i++){
|
||||||
|
Keyword *p = &aKeywordTable[i];
|
||||||
|
if( p->substrId ){
|
||||||
|
p->offset = findById(p->substrId)->offset + p->substrOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sort the table by offset */
|
||||||
|
qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare3);
|
||||||
|
|
||||||
/* Figure out how big to make the hash table in order to minimize the
|
/* Figure out how big to make the hash table in order to minimize the
|
||||||
** number of collisions */
|
** number of collisions */
|
||||||
@@ -222,7 +300,7 @@ int main(int argc, char **argv){
|
|||||||
printf(" static const char zText[%d] =\n", nChar+1);
|
printf(" static const char zText[%d] =\n", nChar+1);
|
||||||
for(i=j=0; i<NKEYWORD; i++){
|
for(i=j=0; i<NKEYWORD; i++){
|
||||||
Keyword *p = &aKeywordTable[i];
|
Keyword *p = &aKeywordTable[i];
|
||||||
if( i<NKEYWORD-1 && p->offset==aKeywordTable[i+1].offset ) continue;
|
if( p->substrId ) continue;
|
||||||
if( j==0 ) printf(" \"");
|
if( j==0 ) printf(" \"");
|
||||||
printf("%s", p->zName);
|
printf("%s", p->zName);
|
||||||
j += p->len;
|
j += p->len;
|
||||||
@@ -260,7 +338,7 @@ int main(int argc, char **argv){
|
|||||||
printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD);
|
printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD);
|
||||||
for(i=j=0; i<NKEYWORD; i++){
|
for(i=j=0; i<NKEYWORD; i++){
|
||||||
if( j==0 ) printf(" ");
|
if( j==0 ) printf(" ");
|
||||||
printf(" %3d,", aKeywordTable[i].len);
|
printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
|
||||||
j++;
|
j++;
|
||||||
if( j>12 ){
|
if( j>12 ){
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
Reference in New Issue
Block a user