diff --git a/manifest b/manifest index 18afda4da7..147f1b1641 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sexperimental\sand\sscary\spragma\s"writable_schema".\s(CVS\s2027) -D 2004-10-22T20:29:22 +C Tighter\sencoding\sof\sthe\skeyword\shash\stable\sin\sthe\stokenizer.\s(CVS\s2028) +D 2004-10-23T05:10:18 F Makefile.in 52c1cc106cad9148d4b7cb387b458e82dc86b339 F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457 F README a01693e454a00cc117967e3f9fdab2d4d52e9bc1 @@ -70,7 +70,7 @@ F src/test2.c b11fa244fff02190707dd0879987c37c75e61fc8 F src/test3.c 5b5b0f3d11b097399c1054fff73d8f3711092301 F src/test4.c 7c6b9fc33dd1f3f93c7f1ee6e5e6d016afa6c1df F src/test5.c b001fa7f1b9e2dc5c2331de62fc641b5ab2bd7a1 -F src/tokenize.c a235e9a38ccf175f7badccdec68f58d79cb5abe5 +F src/tokenize.c f073bbf85fe527e1a8a191a1fda88313c09710ad F src/trigger.c b51a120d53e8b85359be11bf5e50854e5725fe3d F src/update.c 174c3b593b8f4928e510a51ec309e8ce69d2371c F src/utf.c f4f83acd73389090e32d6589d307fc55d794c7ed @@ -201,7 +201,7 @@ F tool/lempar.c 1e61d2b6cb9d8affa264a13336bc0c088498caa4 F tool/memleak.awk b744b6109566206c746d826f6ecdba34662216bc F tool/memleak2.awk 9cc20c8e8f3c675efac71ea0721ee6874a1566e8 F tool/memleak3.tcl 336eb50b0849dbf99b1d5462d9c37291b01b2b43 -F tool/mkkeywordhash.c 0cfab6691def993d5b2124398d25129b12b6b5fd +F tool/mkkeywordhash.c ed1f2a21a4e7f2757281c4cf7a8392c0e0ad2d5d F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e x F tool/opcodeDoc.awk b3a2a3d5d3075b8bd90b7afe24283efdd586659c F tool/report1.txt 9eae07f26a8fc53889b45fc833a66a33daa22816 @@ -252,7 +252,7 @@ F www/tclsqlite.tcl 560ecd6a916b320e59f2917317398f3d59b7cc25 F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9 F www/version3.tcl 092a01f5ef430d2c4acc0ae558d74c4bb89638a0 F www/whentouse.tcl fdacb0ba2d39831e8a6240d05a490026ad4c4e4c -P 0539c2d2b8e16efcbe4db3afeae9c7b426e11b05 -R 3f1f2941d330b2b80181d225c2988800 +P 39f7870a54d90d5163fcad3f08cd63699c4bb567 +R f3fcd5be76f79a1c63d8d5fffbad49dd U drh -Z 3239e241a62fc5e2facb9ebbd32227e4 +Z 91960f90f1e7a02a6b795447e3b7377d diff --git a/manifest.uuid b/manifest.uuid index fcc52e580b..836ed52f45 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -39f7870a54d90d5163fcad3f08cd63699c4bb567 \ No newline at end of file +7b9886f8d4db366bc7dbf25495f0d3b907d25689 \ No newline at end of file diff --git a/src/tokenize.c b/src/tokenize.c index c333699b6e..fee36dbf66 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -15,7 +15,7 @@ ** individual tokens and sends those tokens one-by-one over to the ** parser for analysis. ** -** $Id: tokenize.c,v 1.91 2004/10/07 19:03:01 drh Exp $ +** $Id: tokenize.c,v 1.92 2004/10/23 05:10:18 drh Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -35,81 +35,79 @@ ** the data tables) then rerun that program to regenerate this function. */ int sqlite3KeywordCode(const char *z, int n){ - static const char zText[519] = - "ABORTAFTERALLANDASCATTACHBEFOREBEGINBETWEENBYCASCADECASECHECK" - "COLLATECOMMITCONFLICTCONSTRAINTCREATECROSSDATABASEDEFAULTDEFERRABLE" - "DEFERREDDELETEDESCDETACHDISTINCTDROPEACHELSEENDEXCEPTEXCLUSIVE" - "EXPLAINFAILFOREIGNFROMFULLGLOBGROUPHAVINGIGNOREIMMEDIATEINDEX" - "INITIALLYINNERINSERTINSTEADINTERSECTINTOISNULLJOINKEYLEFTLIKE" - "LIMITMATCHNATURALNOTNULLNULLOFFSETONORDEROUTERPRAGMAPRIMARYRAISE" - "REFERENCESREPLACERESTRICTRIGHTROLLBACKROWSELECTSETSTATEMENTTABLE" - "TEMPORARYTHENTRANSACTIONTRIGGERUNIONUNIQUEUPDATEUSINGVACUUMVALUES" - "VIEWWHENWHERE"; + static const char zText[443] = + "ABORTABLEFTEMPORARYAFTERAISELECTHENDATABASEACHECKEYANDEFAULTRANSACTION" + "ATURALIKELSEXCEPTRIGGEREFERENCESTATEMENTATTACHAVINGLOBEFOREIGN" + "OREPLACEXCLUSIVEXPLAINDEXBEGINITIALLYBETWEENOTNULLIMITBYCASCADE" + "FERRABLECASECOLLATECOMMITCONFLICTCONSTRAINTERSECTCREATECROSSDEFERRED" + "ELETEDESCDETACHDISTINCTDROPRAGMATCHFAILFROMFULLGROUPDATEIMMEDIATE" + "INNERESTRICTINSERTINSTEADINTOFFSETISNULLJOINORDERIGHTOUTEROLLBACK" + "PRIMARYROWHENUNIONUNIQUEUSINGVACUUMVALUESVIEWHERE"; static const unsigned char aHash[154] = { - 0, 75, 82, 0, 0, 97, 80, 0, 83, 0, 0, 0, 0, - 0, 0, 6, 0, 95, 4, 0, 0, 0, 0, 0, 0, 0, - 0, 96, 86, 8, 0, 26, 13, 7, 19, 15, 0, 0, 32, - 25, 0, 21, 31, 41, 0, 0, 0, 34, 27, 0, 0, 30, - 0, 0, 0, 9, 0, 10, 0, 0, 0, 0, 51, 0, 44, - 43, 0, 45, 40, 0, 29, 39, 35, 0, 0, 20, 0, 59, - 0, 16, 0, 17, 0, 18, 0, 55, 42, 72, 0, 33, 0, - 0, 61, 66, 56, 0, 0, 0, 0, 0, 0, 0, 54, 0, - 0, 0, 0, 0, 74, 50, 76, 64, 52, 0, 0, 0, 0, - 68, 84, 0, 47, 0, 58, 60, 92, 0, 0, 48, 0, 93, - 0, 63, 71, 98, 0, 0, 0, 0, 0, 67, 0, 0, 0, - 0, 87, 0, 0, 0, 0, 0, 90, 88, 0, 94, + 0, 26, 82, 0, 0, 91, 90, 0, 27, 0, 0, 0, 0, + 0, 0, 49, 0, 96, 17, 0, 0, 0, 0, 0, 0, 0, + 0, 97, 5, 31, 0, 62, 51, 28, 58, 52, 0, 0, 60, + 61, 0, 12, 41, 50, 0, 0, 0, 36, 63, 0, 0, 15, + 0, 0, 0, 39, 0, 42, 0, 0, 0, 0, 78, 0, 34, + 29, 0, 74, 71, 0, 66, 70, 37, 0, 0, 59, 0, 33, + 0, 53, 0, 54, 0, 55, 0, 83, 72, 67, 0, 24, 0, + 0, 79, 80, 84, 0, 0, 0, 0, 0, 0, 0, 75, 0, + 0, 0, 0, 0, 45, 77, 35, 44, 57, 0, 0, 0, 0, + 20, 2, 0, 38, 0, 3, 46, 93, 0, 0, 40, 0, 94, + 0, 43, 87, 98, 0, 0, 0, 0, 0, 81, 0, 0, 0, + 0, 10, 0, 0, 0, 0, 0, 92, 19, 0, 95, }; static const unsigned char aNext[98] = { - 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 12, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, - 0, 0, 0, 14, 3, 24, 0, 0, 0, 1, 22, 0, 0, - 36, 23, 28, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, - 0, 49, 37, 0, 0, 0, 38, 0, 53, 0, 57, 62, 0, - 0, 0, 0, 0, 0, 70, 46, 0, 65, 0, 0, 0, 0, - 69, 73, 0, 77, 0, 0, 0, 0, 0, 0, 81, 85, 0, - 91, 79, 78, 0, 0, 89, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, + 0, 0, 0, 0, 0, 18, 22, 0, 0, 0, 0, 0, 0, + 0, 23, 0, 16, 21, 8, 0, 32, 0, 0, 30, 0, 48, + 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, + 0, 56, 0, 1, 0, 69, 64, 0, 0, 65, 0, 0, 13, + 68, 0, 0, 76, 47, 0, 0, 0, 85, 6, 0, 89, 25, + 4, 73, 88, 86, 0, 0, 0, }; static const unsigned char aLen[98] = { - 5, 5, 3, 3, 2, 3, 6, 6, 5, 7, 2, 7, 4, - 5, 7, 6, 8, 10, 6, 5, 8, 7, 10, 8, 6, 4, - 6, 8, 4, 4, 4, 3, 6, 9, 7, 4, 3, 7, 4, - 4, 4, 5, 6, 6, 9, 2, 5, 9, 5, 6, 7, 9, - 4, 2, 6, 4, 3, 4, 4, 5, 5, 7, 3, 7, 4, - 2, 6, 2, 2, 5, 5, 6, 7, 5, 10, 7, 8, 5, - 8, 3, 6, 3, 9, 5, 4, 9, 4, 11, 7, 5, 6, - 6, 5, 6, 6, 4, 4, 5, + 5, 5, 4, 4, 9, 2, 5, 5, 6, 4, 3, 8, 2, + 4, 5, 3, 3, 7, 11, 2, 7, 4, 4, 6, 7, 10, + 9, 6, 6, 4, 6, 3, 7, 6, 7, 9, 7, 5, 5, + 9, 3, 7, 3, 7, 4, 5, 2, 7, 3, 10, 4, 7, + 6, 8, 10, 2, 9, 6, 5, 8, 6, 4, 6, 8, 2, + 4, 6, 5, 4, 4, 4, 5, 6, 9, 5, 8, 6, 7, + 4, 2, 6, 3, 6, 4, 5, 5, 5, 8, 7, 3, 4, + 5, 6, 5, 6, 6, 4, 5, }; static const unsigned short int aOffset[98] = { - 0, 5, 10, 13, 16, 16, 19, 25, 31, 36, 43, 45, 52, - 56, 61, 68, 74, 82, 92, 98, 103, 111, 118, 128, 136, 142, - 146, 152, 160, 164, 168, 172, 175, 181, 190, 197, 201, 201, 208, - 212, 216, 220, 225, 231, 237, 246, 246, 251, 260, 265, 271, 278, - 287, 291, 291, 297, 301, 304, 308, 312, 317, 322, 329, 329, 336, - 340, 340, 346, 348, 348, 353, 358, 364, 371, 376, 386, 393, 401, - 406, 414, 417, 423, 426, 435, 440, 440, 449, 453, 464, 471, 476, - 482, 488, 493, 499, 505, 509, 513, + 0, 4, 7, 10, 10, 14, 19, 23, 26, 31, 33, 35, 40, + 42, 44, 48, 51, 53, 59, 68, 69, 75, 78, 81, 86, 92, + 101, 110, 115, 120, 123, 125, 125, 129, 133, 139, 147, 152, 157, + 160, 165, 169, 175, 175, 178, 181, 186, 188, 189, 193, 203, 207, + 214, 220, 228, 235, 235, 244, 250, 255, 262, 268, 272, 278, 279, + 286, 289, 293, 298, 302, 306, 310, 313, 319, 328, 332, 340, 346, + 353, 356, 356, 359, 362, 368, 372, 376, 381, 385, 393, 400, 402, + 406, 411, 417, 422, 428, 434, 437, }; static const unsigned char aCode[98] = { - TK_ABORT, TK_AFTER, TK_ALL, TK_AND, TK_AS, - TK_ASC, TK_ATTACH, TK_BEFORE, TK_BEGIN, TK_BETWEEN, - TK_BY, TK_CASCADE, TK_CASE, TK_CHECK, TK_COLLATE, - TK_COMMIT, TK_CONFLICT, TK_CONSTRAINT, TK_CREATE, TK_JOIN_KW, - TK_DATABASE, TK_DEFAULT, TK_DEFERRABLE, TK_DEFERRED, TK_DELETE, - TK_DESC, TK_DETACH, TK_DISTINCT, TK_DROP, TK_EACH, - TK_ELSE, TK_END, TK_EXCEPT, TK_EXCLUSIVE, TK_EXPLAIN, - TK_FAIL, TK_FOR, TK_FOREIGN, TK_FROM, TK_JOIN_KW, - TK_GLOB, TK_GROUP, TK_HAVING, TK_IGNORE, TK_IMMEDIATE, - TK_IN, TK_INDEX, TK_INITIALLY, TK_JOIN_KW, TK_INSERT, - TK_INSTEAD, TK_INTERSECT, TK_INTO, TK_IS, TK_ISNULL, - TK_JOIN, TK_KEY, TK_JOIN_KW, TK_LIKE, TK_LIMIT, - TK_MATCH, TK_JOIN_KW, TK_NOT, TK_NOTNULL, TK_NULL, - TK_OF, TK_OFFSET, TK_ON, TK_OR, TK_ORDER, - TK_JOIN_KW, TK_PRAGMA, TK_PRIMARY, TK_RAISE, TK_REFERENCES, - TK_REPLACE, TK_RESTRICT, TK_JOIN_KW, TK_ROLLBACK, TK_ROW, - TK_SELECT, TK_SET, TK_STATEMENT, TK_TABLE, TK_TEMP, - TK_TEMP, TK_THEN, TK_TRANSACTION,TK_TRIGGER, TK_UNION, - TK_UNIQUE, TK_UPDATE, TK_USING, TK_VACUUM, TK_VALUES, - TK_VIEW, TK_WHEN, TK_WHERE, + TK_ABORT, TK_TABLE, TK_JOIN_KW, TK_TEMP, TK_TEMP, + TK_OR, TK_AFTER, TK_RAISE, TK_SELECT, TK_THEN, + TK_END, TK_DATABASE, TK_AS, TK_EACH, TK_CHECK, + TK_KEY, TK_AND, TK_DEFAULT, TK_TRANSACTION,TK_ON, + TK_JOIN_KW, TK_LIKE, TK_ELSE, TK_EXCEPT, TK_TRIGGER, + TK_REFERENCES, TK_STATEMENT, TK_ATTACH, TK_HAVING, TK_GLOB, + TK_BEFORE, TK_FOR, TK_FOREIGN, TK_IGNORE, TK_REPLACE, + TK_EXCLUSIVE, TK_EXPLAIN, TK_INDEX, TK_BEGIN, TK_INITIALLY, + TK_ALL, TK_BETWEEN, TK_NOT, TK_NOTNULL, TK_NULL, + TK_LIMIT, TK_BY, TK_CASCADE, TK_ASC, TK_DEFERRABLE, + TK_CASE, TK_COLLATE, TK_COMMIT, TK_CONFLICT, TK_CONSTRAINT, + TK_IN, TK_INTERSECT, TK_CREATE, TK_JOIN_KW, TK_DEFERRED, + TK_DELETE, TK_DESC, TK_DETACH, TK_DISTINCT, TK_IS, + TK_DROP, TK_PRAGMA, TK_MATCH, TK_FAIL, TK_FROM, + TK_JOIN_KW, TK_GROUP, TK_UPDATE, TK_IMMEDIATE, TK_JOIN_KW, + TK_RESTRICT, TK_INSERT, TK_INSTEAD, TK_INTO, TK_OF, + TK_OFFSET, TK_SET, TK_ISNULL, TK_JOIN, TK_ORDER, + TK_JOIN_KW, TK_JOIN_KW, TK_ROLLBACK, TK_PRIMARY, TK_ROW, + TK_WHEN, TK_UNION, TK_UNIQUE, TK_USING, TK_VACUUM, + TK_VALUES, TK_VIEW, TK_WHERE, }; int h, i; if( n<2 ) return TK_ID; @@ -124,6 +122,7 @@ int sqlite3KeywordCode(const char *z, int n){ return TK_ID; } + /* ** If X is a character that can be used in an identifier and ** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then diff --git a/tool/mkkeywordhash.c b/tool/mkkeywordhash.c index 1e51d0ebb2..6fb90446e5 100644 --- a/tool/mkkeywordhash.c +++ b/tool/mkkeywordhash.c @@ -15,10 +15,14 @@ typedef struct Keyword Keyword; struct Keyword { char *zName; /* The keyword name */ char *zTokenType; /* Token value for this keyword */ + int id; /* Unique ID for this record */ int hash; /* Hash on the keyword */ int offset; /* Offset to start of name string */ int len; /* Length of this keyword, not counting final \000 */ + int prefix; /* Number of characters in prefix */ int iNext; /* Index in aKeywordTable[] of next with same hash */ + int substrId; /* Id to another keyword this keyword is embedded in */ + int substrOffset; /* Offset into substrId for start of this keyword */ }; /* @@ -153,10 +157,37 @@ const unsigned char sqlite3UpperToLower[] = { /* ** Comparision function for two Keyword records */ -static int keywordCompare(const void *a, const void *b){ +static int keywordCompare1(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; const Keyword *pB = (Keyword*)b; - return strcmp(pA->zName, pB->zName); + int n = pA->len - pB->len; + if( n==0 ){ + n = strcmp(pA->zName, pB->zName); + } + return n; +} +static int keywordCompare2(const void *a, const void *b){ + const Keyword *pA = (Keyword*)a; + const Keyword *pB = (Keyword*)b; + int n = strcmp(pA->zName, pB->zName); + return n; +} +static int keywordCompare3(const void *a, const void *b){ + const Keyword *pA = (Keyword*)a; + const Keyword *pB = (Keyword*)b; + int n = pA->offset - pB->offset; + return n; +} + +/* +** Return a KeywordTable entry with the given id +*/ +static Keyword *findById(int id){ + int i; + for(i=0; ilen = strlen(p->zName); - /* p->hash = sqlite3HashNoCase(p->zName, p->len); */ p->hash = UpperToLower[p->zName[0]]*5 + UpperToLower[p->zName[p->len-1]]*3 + p->len; - p->offset = nChar; - if( izName, aKeywordTable[i+1].zName,p->len)==0 ){ - /* This entry is a prefix of the one that follows. Do not advance - ** the offset */ - }else{ - nChar += p->len; + p->id = i+1; + } + + /* Sort the table from shortest to longest keyword */ + qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare1); + + /* Look for short keywords embedded in longer keywords */ + for(i=NKEYWORD-2; i>=0; i--){ + Keyword *p = &aKeywordTable[i]; + for(j=NKEYWORD-1; j>i && p->substrId==0; j--){ + Keyword *pOther = &aKeywordTable[j]; + if( pOther->substrId ) continue; + if( pOther->len<=p->len ) continue; + for(k=0; k<=pOther->len-p->len; k++){ + if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){ + p->substrId = pOther->id; + p->substrOffset = k; + break; + } + } } } + /* Sort the table into alphabetical order */ + qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare2); + + /* Fill in the offset for all entries */ + nChar = 0; + for(i=0; ioffset>0 || p->substrId ) continue; + p->offset = nChar; + nChar += p->len; + for(k=p->len-1; k>=1; k--){ + for(j=i+1; joffset>0 || pOther->substrId ) continue; + if( pOther->len<=k ) continue; + if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ + p = pOther; + p->offset = nChar - k; + nChar = p->offset + p->len; + p->zName += k; + p->len -= k; + p->prefix = k; + j = i; + k = p->len; + } + } + } + } + for(i=0; isubstrId ){ + p->offset = findById(p->substrId)->offset + p->substrOffset; + } + } + + /* Sort the table by offset */ + qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare3); + /* Figure out how big to make the hash table in order to minimize the ** number of collisions */ bestSize = NKEYWORD; @@ -222,7 +300,7 @@ int main(int argc, char **argv){ printf(" static const char zText[%d] =\n", nChar+1); for(i=j=0; ioffset==aKeywordTable[i+1].offset ) continue; + if( p->substrId ) continue; if( j==0 ) printf(" \""); printf("%s", p->zName); j += p->len; @@ -260,7 +338,7 @@ int main(int argc, char **argv){ printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD); for(i=j=0; i12 ){ printf("\n");