diff --git a/Makefile.in b/Makefile.in index d2f9710059..028f615cd7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1306,6 +1306,9 @@ showwal$(TEXE): $(TOP)/tool/showwal.c sqlite3.lo showshm$(TEXE): $(TOP)/tool/showshm.c $(LTLINK) -o $@ $(TOP)/tool/showshm.c +index_usage$(TEXE): $(TOP)/tool/index_usage.c sqlite3.lo + $(LTLINK) -o $@ $(TOP)/tool/index_usage.c sqlite3.lo $(TLIBS) + changeset$(TEXE): $(TOP)/ext/session/changeset.c sqlite3.lo $(LTLINK) -o $@ $(TOP)/ext/session/changeset.c sqlite3.lo $(TLIBS) diff --git a/Makefile.msc b/Makefile.msc index e3feffab35..3b95d088d2 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -2444,6 +2444,10 @@ showwal.exe: $(TOP)\tool\showwal.c $(SQLITE3C) $(SQLITE3H) showshm.exe: $(TOP)\tool\showshm.c $(LTLINK) $(NO_WARN) $(TOP)\tool\showshm.c /link $(LDFLAGS) $(LTLINKOPTS) +index_usage.exe: $(TOP)\tool\index_usage.c $(SQLITE3C) $(SQLITE3H) + $(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \ + $(TOP)\tool\index_usage.c $(SQLITE3C) /link $(LDFLAGS) $(LTLINKOPTS) + changeset.exe: $(TOP)\ext\session\changeset.c $(SQLITE3C) $(SQLITE3H) $(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \ -DSQLITE_ENABLE_SESSION=1 -DSQLITE_ENABLE_PREUPDATE_HOOK=1 \ diff --git a/autoconf/Makefile.msc b/autoconf/Makefile.msc index 270c83c230..a131d4c6e7 100644 --- a/autoconf/Makefile.msc +++ b/autoconf/Makefile.msc @@ -283,6 +283,7 @@ OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_STMTVTAB=1 OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DBPAGE_VTAB=1 OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DBSTAT_VTAB=1 OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_INTROSPECTION_PRAGMAS=1 +OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DESERIALIZE=1 !ENDIF OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_COLUMN_METADATA=1 !ENDIF @@ -937,6 +938,7 @@ LIBRESOBJS = SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_FTS4=1 SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_EXPLAIN_COMMENTS=1 SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_OFFSET_SQL_FUNC=1 +SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_DESERIALIZE=1 !ENDIF diff --git a/ext/fts3/fts3_unicode.c b/ext/fts3/fts3_unicode.c index dfb2680c50..c02ea3990c 100644 --- a/ext/fts3/fts3_unicode.c +++ b/ext/fts3/fts3_unicode.c @@ -82,7 +82,7 @@ typedef struct unicode_cursor unicode_cursor; struct unicode_tokenizer { sqlite3_tokenizer base; - int bRemoveDiacritic; + int eRemoveDiacritic; int nException; int *aiException; }; @@ -227,17 +227,20 @@ static int unicodeCreate( pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); if( pNew==NULL ) return SQLITE_NOMEM; memset(pNew, 0, sizeof(unicode_tokenizer)); - pNew->bRemoveDiacritic = 1; + pNew->eRemoveDiacritic = 1; for(i=0; rc==SQLITE_OK && ibRemoveDiacritic = 1; + pNew->eRemoveDiacritic = 1; } else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ - pNew->bRemoveDiacritic = 0; + pNew->eRemoveDiacritic = 0; + } + else if( n==19 && memcmp("remove_diacritics=2", z, 19)==0 ){ + pNew->eRemoveDiacritic = 2; } else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); @@ -350,7 +353,7 @@ static int unicodeNext( /* Write the folded case of the last character read to the output */ zEnd = z; - iOut = sqlite3FtsUnicodeFold((int)iCode, p->bRemoveDiacritic); + iOut = sqlite3FtsUnicodeFold((int)iCode, p->eRemoveDiacritic); if( iOut ){ WRITE_UTF8(zOut, iOut); } diff --git a/ext/fts3/fts3_unicode2.c b/ext/fts3/fts3_unicode2.c index da7251ed0c..41027b2546 100644 --- a/ext/fts3/fts3_unicode2.c +++ b/ext/fts3/fts3_unicode2.c @@ -159,32 +159,47 @@ int sqlite3FtsUnicodeIsalnum(int c){ ** E"). The resuls of passing a codepoint that corresponds to an ** uppercase letter are undefined. */ -static int remove_diacritic(int c){ +static int remove_diacritic(int c, int bComplex){ unsigned short aDia[] = { 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, - 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, - 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, - 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, - 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, - 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, - 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, - 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, - 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, - 62924, 63050, 63082, 63274, 63390, + 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, + 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, + 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, + 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, + 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, + 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, + 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, + 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, + 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, + 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, + 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, + 63182, 63242, 63274, 63310, 63368, 63390, }; char aChar[] = { - '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', - 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', - 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', - 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', - 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', - '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', - 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', - 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', - 'e', 'i', 'o', 'u', 'y', + '\0', 'a'|0x00, 'c'|0x00, 'e'|0x00, 'i'|0x00, 'n'|0x00, + 'o'|0x00, 'u'|0x00, 'y'|0x00, 'y'|0x00, 'a'|0x00, 'c'|0x00, + 'd'|0x00, 'e'|0x00, 'e'|0x00, 'g'|0x00, 'h'|0x00, 'i'|0x00, + 'j'|0x00, 'k'|0x00, 'l'|0x00, 'n'|0x00, 'o'|0x00, 'r'|0x00, + 's'|0x00, 't'|0x00, 'u'|0x00, 'u'|0x00, 'w'|0x00, 'y'|0x00, + 'z'|0x00, 'o'|0x00, 'u'|0x00, 'a'|0x00, 'i'|0x00, 'o'|0x00, + 'u'|0x00, 'u'|0x80, 'a'|0x80, 'g'|0x00, 'k'|0x00, 'o'|0x00, + 'o'|0x80, 'j'|0x00, 'g'|0x00, 'n'|0x00, 'a'|0x80, 'a'|0x00, + 'e'|0x00, 'i'|0x00, 'o'|0x00, 'r'|0x00, 'u'|0x00, 's'|0x00, + 't'|0x00, 'h'|0x00, 'a'|0x00, 'e'|0x00, 'o'|0x80, 'o'|0x00, + 'o'|0x80, 'y'|0x00, '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', 'a'|0x00, 'b'|0x00, + 'c'|0x80, 'd'|0x00, 'd'|0x00, 'e'|0x80, 'e'|0x00, 'e'|0x80, + 'f'|0x00, 'g'|0x00, 'h'|0x00, 'h'|0x00, 'i'|0x00, 'i'|0x80, + 'k'|0x00, 'l'|0x00, 'l'|0x80, 'l'|0x00, 'm'|0x00, 'n'|0x00, + 'o'|0x80, 'p'|0x00, 'r'|0x00, 'r'|0x80, 'r'|0x00, 's'|0x00, + 's'|0x80, 't'|0x00, 'u'|0x00, 'u'|0x80, 'v'|0x00, 'w'|0x00, + 'w'|0x00, 'x'|0x00, 'y'|0x00, 'z'|0x00, 'h'|0x00, 't'|0x00, + 'w'|0x00, 'y'|0x00, 'a'|0x00, 'a'|0x80, 'a'|0x80, 'a'|0x80, + 'e'|0x00, 'e'|0x80, 'e'|0x80, 'i'|0x00, 'o'|0x00, 'o'|0x80, + 'o'|0x80, 'o'|0x80, 'u'|0x00, 'u'|0x80, 'u'|0x80, 'y'|0x00, }; unsigned int key = (((unsigned int)c)<<3) | 0x00000007; @@ -201,7 +216,8 @@ static int remove_diacritic(int c){ } } assert( key>=aDia[iRes] ); - return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); + if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; + return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); } @@ -228,7 +244,7 @@ int sqlite3FtsUnicodeIsdiacritic(int c){ ** The results are undefined if the value passed to this function ** is less than zero. */ -int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ +int sqlite3FtsUnicodeFold(int c, int eRemoveDiacritic){ /* Each entry in the following array defines a rule for folding a range ** of codepoints to lower case. The rule applies to a range of nRange ** codepoints starting at codepoint iCode. @@ -351,7 +367,9 @@ int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ assert( ret>0 ); } - if( bRemoveDiacritic ) ret = remove_diacritic(ret); + if( eRemoveDiacritic ){ + ret = remove_diacritic(ret, eRemoveDiacritic==2); + } } else if( c>=66560 && c<66600 ){ diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 84b8ddc80b..8465262033 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -9,11 +9,12 @@ proc print_rd {map} { set nRange 1 set iFirst [lindex $map 0 0] set cPrev [lindex $map 0 1] + set fPrev [lindex $map 0 2] foreach m [lrange $map 1 end] { - foreach {i c} $m {} + foreach {i c f} $m {} - if {$cPrev == $c} { + if {$cPrev == $c && $fPrev==$f} { for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} { if {[info exists tl_lookup_table($j)]==0} break } @@ -29,13 +30,16 @@ proc print_rd {map} { lappend lRange [list $iFirst $nRange] lappend aChar $cPrev + lappend aFlag $fPrev set iFirst $i set cPrev $c + set fPrev $f set nRange 1 } lappend lRange [list $iFirst $nRange] lappend aChar $cPrev + lappend aFlag $fPrev puts "/*" puts "** If the argument is a codepoint corresponding to a lowercase letter" @@ -45,7 +49,7 @@ proc print_rd {map} { puts "** E\"). The resuls of passing a codepoint that corresponds to an" puts "** uppercase letter are undefined." puts "*/" - puts "static int ${::remove_diacritic}(int c)\{" + puts "static int ${::remove_diacritic}(int c, int bComplex)\{" puts " unsigned short aDia\[\] = \{" puts -nonewline " 0, " set i 1 @@ -60,13 +64,17 @@ proc print_rd {map} { puts "" puts " \};" puts " char aChar\[\] = \{" - puts -nonewline " '\\0', " + puts -nonewline " '\\0', " set i 1 - foreach c $aChar { - set str "'$c', " - if {$c == ""} { set str "'\\0', " } + foreach c $aChar f $aFlag { + if { $f } { + set str "'$c'|0x80, " + } else { + set str "'$c'|0x00, " + } + if {$c == ""} { set str "'\\0', " } - if {($i % 12)==0} {puts "" ; puts -nonewline " " } + if {($i % 6)==0} {puts "" ; puts -nonewline " " } incr i puts -nonewline "$str" } @@ -87,7 +95,8 @@ proc print_rd {map} { } } assert( key>=aDia[iRes] ); - return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);} + if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; + return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);} puts "\}" } @@ -95,7 +104,8 @@ proc print_isdiacritic {zFunc map} { set lCode [list] foreach m $map { - foreach {code char} $m {} + foreach {code char flag} $m {} + if {$flag} continue if {$code && $char == ""} { lappend lCode $code } } set lCode [lsort -integer $lCode] @@ -472,7 +482,7 @@ proc print_fold {zFunc} { puts "** The results are undefined if the value passed to this function" puts "** is less than zero." puts "*/" - puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{" + puts "int ${zFunc}\(int c, int eRemoveDiacritic)\{" set liOff [tl_generate_ioff_table $lRecord] tl_print_table_header @@ -516,7 +526,9 @@ proc print_fold {zFunc} { assert( ret>0 ); } - if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret); + if( eRemoveDiacritic ){ + ret = ${::remove_diacritic}(ret, eRemoveDiacritic==2); + } } }] @@ -605,10 +617,6 @@ proc print_categories {lMap} { set nCat [expr [llength [array names C]] + 1] puts [code { - int sqlite3Fts5UnicodeNCat(void) { - return $nCat; - } - int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ aArray[0] = 1; switch( zCat[0] ){ diff --git a/ext/fts3/unicode/parseunicode.tcl b/ext/fts3/unicode/parseunicode.tcl index 966d7bdd3a..7c246a4a09 100644 --- a/ext/fts3/unicode/parseunicode.tcl +++ b/ext/fts3/unicode/parseunicode.tcl @@ -7,12 +7,24 @@ # character that it should be replaced with, or an empty string if the # codepoint should simply be removed from the input. Examples: # -# { 224 a } (replace codepoint 224 to "a") -# { 769 "" } (remove codepoint 769 from input) +# { 224 a 0 } (replace codepoint 224 to "a") +# { 769 "" 0 } (remove codepoint 769 from input) # # Mappings are only returned for non-upper case codepoints. It is assumed # that the input has already been folded to lower case. # +# The third value in the list is always either 0 or 1. 0 if the +# UnicodeData.txt file maps the codepoint to a single ASCII character and +# a diacritic, or 1 if the mapping is indirect. For example, consider the +# two entries: +# +# 1ECD;LATIN SMALL LETTER O WITH DOT BELOW;Ll;0;L;006F 0323;;;;N;;;1ECC;;1ECC +# 1ED9;LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW;Ll;0;L;1ECD 0302;;;;N;;;1ED8;;1ED8 +# +# The first codepoint is a direct mapping (as 006F is ASCII and 0323 is a +# diacritic). The second is an indirect mapping, as it maps to the +# first codepoint plus 0302 (a diacritic). +# proc rd_load_unicodedata_text {zName} { global tl_lookup_table @@ -53,18 +65,29 @@ proc rd_load_unicodedata_text {zName} { set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"] set iDia [expr "0x[lindex $character_decomposition_mapping 1]"] + # Filter out upper-case characters, as they will be mapped to their + # lower-case equivalents before this data is used. if {[info exists tl_lookup_table($iCode)]} continue + # Check if this is an indirect mapping. If so, set bIndirect to true + # and change $iAscii to the indirectly mappped ASCII character. + set bIndirect 0 + if {[info exists dia($iDia)] && [info exists mapping($iAscii)]} { + set iAscii $mapping($iAscii) + set bIndirect 1 + } + if { ($iAscii >= 97 && $iAscii <= 122) || ($iAscii >= 65 && $iAscii <= 90) } { - lappend lRet [list $iCode [string tolower [format %c $iAscii]]] + lappend lRet [list $iCode [string tolower [format %c $iAscii]] $bIndirect] + set mapping($iCode) $iAscii set dia($iDia) 1 } } foreach d [array names dia] { - lappend lRet [list $d ""] + lappend lRet [list $d "" 0] } set lRet [lsort -integer -index 0 $lRet] diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index af2bc222f2..35526a3898 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -234,13 +234,18 @@ struct Unicode61Tokenizer { unsigned char aTokenChar[128]; /* ASCII range token characters */ char *aFold; /* Buffer to fold text into */ int nFold; /* Size of aFold[] in bytes */ - int bRemoveDiacritic; /* True if remove_diacritics=1 is set */ + int eRemoveDiacritic; /* True if remove_diacritics=1 is set */ int nException; int *aiException; unsigned char aCategory[32]; /* True for token char categories */ }; +/* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */ +#define FTS5_REMOVE_DIACRITICS_NONE 0 +#define FTS5_REMOVE_DIACRITICS_SIMPLE 1 +#define FTS5_REMOVE_DIACRITICS_COMPLEX 2 + static int fts5UnicodeAddExceptions( Unicode61Tokenizer *p, /* Tokenizer object */ const char *z, /* Characters to treat as exceptions */ @@ -361,7 +366,7 @@ static int fts5UnicodeCreate( int i; memset(p, 0, sizeof(Unicode61Tokenizer)); - p->bRemoveDiacritic = 1; + p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE; p->nFold = 64; p->aFold = sqlite3_malloc(p->nFold * sizeof(char)); if( p->aFold==0 ){ @@ -382,10 +387,15 @@ static int fts5UnicodeCreate( for(i=0; rc==SQLITE_OK && ieRemoveDiacritic = (zArg[0] - '0'); + assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE + || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE + || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX + ); } - p->bRemoveDiacritic = (zArg[0]=='1'); }else if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ rc = fts5UnicodeAddExceptions(p, zArg, 1); @@ -499,7 +509,7 @@ static int fts5UnicodeTokenize( READ_UTF8(zCsr, zTerm, iCode); if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ non_ascii_tokenchar: - iCode = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); + iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic); if( iCode ) WRITE_UTF8(zOut, iCode); }else{ break; diff --git a/ext/fts5/fts5_unicode2.c b/ext/fts5/fts5_unicode2.c index 8c48aaa49b..9d3ffdc972 100644 --- a/ext/fts5/fts5_unicode2.c +++ b/ext/fts5/fts5_unicode2.c @@ -28,32 +28,47 @@ ** E"). The resuls of passing a codepoint that corresponds to an ** uppercase letter are undefined. */ -static int fts5_remove_diacritic(int c){ +static int fts5_remove_diacritic(int c, int bComplex){ unsigned short aDia[] = { 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, - 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, - 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, - 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, - 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, - 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, - 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, - 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, - 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, - 62924, 63050, 63082, 63274, 63390, + 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, + 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, + 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, + 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, + 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, + 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, + 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, + 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, + 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, + 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, + 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, + 63182, 63242, 63274, 63310, 63368, 63390, }; char aChar[] = { - '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', - 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', - 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', - 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', - 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', - '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', - 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', - 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', - 'e', 'i', 'o', 'u', 'y', + '\0', 'a'|0x00, 'c'|0x00, 'e'|0x00, 'i'|0x00, 'n'|0x00, + 'o'|0x00, 'u'|0x00, 'y'|0x00, 'y'|0x00, 'a'|0x00, 'c'|0x00, + 'd'|0x00, 'e'|0x00, 'e'|0x00, 'g'|0x00, 'h'|0x00, 'i'|0x00, + 'j'|0x00, 'k'|0x00, 'l'|0x00, 'n'|0x00, 'o'|0x00, 'r'|0x00, + 's'|0x00, 't'|0x00, 'u'|0x00, 'u'|0x00, 'w'|0x00, 'y'|0x00, + 'z'|0x00, 'o'|0x00, 'u'|0x00, 'a'|0x00, 'i'|0x00, 'o'|0x00, + 'u'|0x00, 'u'|0x80, 'a'|0x80, 'g'|0x00, 'k'|0x00, 'o'|0x00, + 'o'|0x80, 'j'|0x00, 'g'|0x00, 'n'|0x00, 'a'|0x80, 'a'|0x00, + 'e'|0x00, 'i'|0x00, 'o'|0x00, 'r'|0x00, 'u'|0x00, 's'|0x00, + 't'|0x00, 'h'|0x00, 'a'|0x00, 'e'|0x00, 'o'|0x80, 'o'|0x00, + 'o'|0x80, 'y'|0x00, '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', 'a'|0x00, 'b'|0x00, + 'c'|0x80, 'd'|0x00, 'd'|0x00, 'e'|0x80, 'e'|0x00, 'e'|0x80, + 'f'|0x00, 'g'|0x00, 'h'|0x00, 'h'|0x00, 'i'|0x00, 'i'|0x80, + 'k'|0x00, 'l'|0x00, 'l'|0x80, 'l'|0x00, 'm'|0x00, 'n'|0x00, + 'o'|0x80, 'p'|0x00, 'r'|0x00, 'r'|0x80, 'r'|0x00, 's'|0x00, + 's'|0x80, 't'|0x00, 'u'|0x00, 'u'|0x80, 'v'|0x00, 'w'|0x00, + 'w'|0x00, 'x'|0x00, 'y'|0x00, 'z'|0x00, 'h'|0x00, 't'|0x00, + 'w'|0x00, 'y'|0x00, 'a'|0x00, 'a'|0x80, 'a'|0x80, 'a'|0x80, + 'e'|0x00, 'e'|0x80, 'e'|0x80, 'i'|0x00, 'o'|0x00, 'o'|0x80, + 'o'|0x80, 'o'|0x80, 'u'|0x00, 'u'|0x80, 'u'|0x80, 'y'|0x00, }; unsigned int key = (((unsigned int)c)<<3) | 0x00000007; @@ -70,7 +85,8 @@ static int fts5_remove_diacritic(int c){ } } assert( key>=aDia[iRes] ); - return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); + if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; + return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); } @@ -97,7 +113,7 @@ int sqlite3Fts5UnicodeIsdiacritic(int c){ ** The results are undefined if the value passed to this function ** is less than zero. */ -int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ +int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){ /* Each entry in the following array defines a rule for folding a range ** of codepoints to lower case. The rule applies to a range of nRange ** codepoints starting at codepoint iCode. @@ -220,7 +236,9 @@ int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ assert( ret>0 ); } - if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret); + if( eRemoveDiacritic ){ + ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2); + } } else if( c>=66560 && c<66600 ){ @@ -230,13 +248,6 @@ int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ return ret; } - -#if 0 -int sqlite3Fts5UnicodeNCat(void) { - return 32; -} -#endif - int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ aArray[0] = 1; switch( zCat[0] ){ @@ -756,9 +767,8 @@ void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){ int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ]; int n = (aFts5UnicodeData[iTbl] >> 5) + i; for(; i<128 && inErr==0 && !db->mallocFailed ){ /* A minimum of one cursor is required if autoincrement is used * See ticket [a696379c1f08866] */ - if( pParse->pAinc!=0 && pParse->nTab==0 ) pParse->nTab = 1; + assert( pParse->pAinc==0 || pParse->nTab>0 ); sqlite3VdbeMakeReady(v, pParse); pParse->rc = SQLITE_DONE; }else{ diff --git a/src/hash.c b/src/hash.c index fba9dc9f80..e725f2aa5e 100644 --- a/src/hash.c +++ b/src/hash.c @@ -72,7 +72,7 @@ static unsigned int strHashN(const char *z, int n){ /* Knuth multiplicative hashing. (Sorting & Searching, p. 510). ** 0x9e3779b1 is 2654435761 which is the closest prime number to ** (2**32)*golden_ratio, where golden_ratio = (sqrt(5) - 1)/2. */ - h += sqlite3UpperToLower[z[i]]; + h += sqlite3UpperToLower[(unsigned char)z[i]]; h *= 0x9e3779b1; } return h; diff --git a/src/hash.h b/src/hash.h index 90540eda50..cf773663b7 100644 --- a/src/hash.h +++ b/src/hash.h @@ -68,6 +68,9 @@ struct HashElem { void sqlite3HashInit(Hash*); void *sqlite3HashInsert(Hash*, const char *pKey, void *pData); void *sqlite3HashFind(const Hash*, const char *pKey); +#ifdef SQLITE_ENABLE_NORMALIZE +void *sqlite3HashFindN(const Hash *pH, const char *pKey, int nKey); +#endif void sqlite3HashClear(Hash*); /* diff --git a/src/insert.c b/src/insert.c index 7a9413901f..0c036e494f 100644 --- a/src/insert.c +++ b/src/insert.c @@ -319,6 +319,7 @@ void sqlite3AutoincrementBegin(Parse *pParse){ aOp[7].p2 = memId+2; aOp[7].p1 = memId; aOp[10].p2 = memId; + if( pParse->nTab==0 ) pParse->nTab = 1; } } diff --git a/src/main.c b/src/main.c index 110c4d2a1a..42d67b1302 100644 --- a/src/main.c +++ b/src/main.c @@ -1996,6 +1996,7 @@ void *sqlite3_trace(sqlite3 *db, void(*xTrace)(void*,const char*), void *pArg){ sqlite3_mutex_enter(db->mutex); pOld = db->pTraceArg; db->mTrace = xTrace ? SQLITE_TRACE_LEGACY : 0; + if( db->xProfile ) db->mTrace |= SQLITE_TRACE_XPROFILE; db->xTrace = (int(*)(u32,void*,void*,void*))xTrace; db->pTraceArg = pArg; sqlite3_mutex_leave(db->mutex); @@ -2020,6 +2021,9 @@ int sqlite3_trace_v2( if( mTrace==0 ) xTrace = 0; if( xTrace==0 ) mTrace = 0; db->mTrace = mTrace; +#ifndef SQLITE_OMIT_DEPRECATED + if( db->xProfile ) db->mTrace |= SQLITE_TRACE_XPROFILE; +#endif db->xTrace = xTrace; db->pTraceArg = pArg; sqlite3_mutex_leave(db->mutex); @@ -2052,6 +2056,8 @@ void *sqlite3_profile( pOld = db->pProfileArg; db->xProfile = xProfile; db->pProfileArg = pArg; + db->mTrace &= SQLITE_TRACE_NONLEGACY_MASK; + if( db->xProfile ) db->mTrace |= SQLITE_TRACE_XPROFILE; sqlite3_mutex_leave(db->mutex); return pOld; } diff --git a/src/parse.y b/src/parse.y index 42fd181909..5aae351bc7 100644 --- a/src/parse.y +++ b/src/parse.y @@ -679,6 +679,12 @@ seltablist(A) ::= stl_prefix(A) nm(Y) dbnm(D) LP exprlist(E) RP as(Z) pNew->zName = pOld->zName; pNew->zDatabase = pOld->zDatabase; pNew->pSelect = pOld->pSelect; + if( pOld->fg.isTabFunc ){ + pNew->u1.pFuncArg = pOld->u1.pFuncArg; + pOld->u1.pFuncArg = 0; + pOld->fg.isTabFunc = 0; + pNew->fg.isTabFunc = 1; + } pOld->zName = pOld->zDatabase = 0; pOld->pSelect = 0; } diff --git a/src/pcache1.c b/src/pcache1.c index 2880c2c5e6..6df0f15d13 100644 --- a/src/pcache1.c +++ b/src/pcache1.c @@ -102,6 +102,7 @@ struct PgHdr1 { PCache1 *pCache; /* Cache that currently owns this page */ PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */ PgHdr1 *pLruPrev; /* Previous in LRU list of unpinned pages */ + /* NB: pLruPrev is only valid if pLruNext!=0 */ }; /* @@ -570,7 +571,8 @@ static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){ pPage->pLruPrev->pLruNext = pPage->pLruNext; pPage->pLruNext->pLruPrev = pPage->pLruPrev; pPage->pLruNext = 0; - pPage->pLruPrev = 0; + /* pPage->pLruPrev = 0; + ** No need to clear pLruPrev as it is never accessed if pLruNext is 0 */ assert( pPage->isAnchor==0 ); assert( pPage->pCache->pGroup->lru.isAnchor==1 ); pPage->pCache->nRecyclable--; @@ -908,8 +910,9 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( pPage->iKey = iKey; pPage->pNext = pCache->apHash[h]; pPage->pCache = pCache; - pPage->pLruPrev = 0; pPage->pLruNext = 0; + /* pPage->pLruPrev = 0; + ** No need to clear pLruPrev since it is not accessed when pLruNext==0 */ *(void **)pPage->page.pExtra = 0; pCache->apHash[h] = pPage; if( iKey>pCache->iMaxKey ){ @@ -1069,7 +1072,7 @@ static void pcache1Unpin( /* It is an error to call this function if the page is already ** part of the PGroup LRU list. */ - assert( pPage->pLruPrev==0 && pPage->pLruNext==0 ); + assert( pPage->pLruNext==0 ); assert( PAGE_IS_PINNED(pPage) ); if( reuseUnlikely || pGroup->nPurgeable>pGroup->nMaxPage ){ diff --git a/src/prepare.c b/src/prepare.c index 4d33f0b1e1..86afac5350 100644 --- a/src/prepare.c +++ b/src/prepare.c @@ -791,8 +791,7 @@ done: */ static int estimateNormalizedSize( const char *zSql, /* The original SQL string */ - int nSql, /* Length of original SQL string */ - u8 prepFlags /* The flags passed to sqlite3_prepare_v3() */ + int nSql /* Length of original SQL string */ ){ int nOut = nSql + 4; const char *z = zSql; @@ -847,18 +846,14 @@ static void copyNormalizedToken( } /* -** Perform normalization of the SQL contained in the prepared statement and -** store the result in the zNormSql field. The schema for the associated -** databases are consulted while performing the normalization in order to -** determine if a token appears to be an identifier. All identifiers are -** left intact in the normalized SQL and all literals are replaced with a -** single '?'. +** Compute a normalization of the SQL given by zSql[0..nSql-1]. Return +** the normalization in space obtained from sqlite3DbMalloc(). Or return +** NULL if anything goes wrong or if zSql is NULL. */ -void sqlite3Normalize( +char *sqlite3Normalize( Vdbe *pVdbe, /* VM being reprepared */ const char *zSql, /* The original SQL string */ - int nSql, /* Size of the input string in bytes */ - u8 prepFlags /* The flags passed to sqlite3_prepare_v3() */ + int nSql /* Size of the input string in bytes */ ){ sqlite3 *db; /* Database handle. */ char *z; /* The output string */ @@ -873,11 +868,10 @@ void sqlite3Normalize( db = sqlite3VdbeDb(pVdbe); assert( db!=0 ); - assert( pVdbe->zNormSql==0 ); - if( zSql==0 ) return; - nZ = estimateNormalizedSize(zSql, nSql, prepFlags); + if( zSql==0 ) return 0; + nZ = estimateNormalizedSize(zSql, nSql); z = sqlite3DbMallocRawNN(db, nZ); - if( z==0 ) return; + if( z==0 ) goto normalizeError; sqlite3HashInit(&inHash); for(i=j=0; i0 && z[j-1]!=';' ){ z[j++] = ';'; } z[j] = 0; assert( jzNormSql = z; sqlite3HashClear(&inHash); + return z; + +normalizeError: + sqlite3DbFree(db, z); + sqlite3HashClear(&inHash); + return 0; } #endif /* SQLITE_ENABLE_NORMALIZE */ diff --git a/src/resolve.c b/src/resolve.c index 0c7dfc0b25..effbe646fe 100644 --- a/src/resolve.c +++ b/src/resolve.c @@ -80,7 +80,7 @@ static void resolveAlias( if( pExpr->op==TK_COLLATE ){ pDup = sqlite3ExprAddCollateString(pParse, pDup, pExpr->u.zToken); } - ExprSetProperty(pDup, EP_Alias); +// ExprSetProperty(pDup, EP_Alias); /* Before calling sqlite3ExprDelete(), set the EP_Static flag. This ** prevents ExprDelete() from deleting the Expr structure itself, diff --git a/src/shell.c.in b/src/shell.c.in index ad5a1498be..887c2fcf59 100644 --- a/src/shell.c.in +++ b/src/shell.c.in @@ -1007,6 +1007,7 @@ struct ShellState { u8 openMode; /* SHELL_OPEN_NORMAL, _APPENDVFS, or _ZIPFILE */ u8 doXdgOpen; /* Invoke start/open/xdg-open in output_reset() */ u8 nEqpLevel; /* Depth of the EQP output graph */ + u8 eTraceType; /* SHELL_TRACE_* value for type of trace */ unsigned mEqpLines; /* Mask of veritical lines in the EQP output graph */ int outCount; /* Revert to stdout when reaching zero */ int cnt; /* Number of records displayed so far */ @@ -1066,6 +1067,12 @@ struct ShellState { #define SHELL_OPEN_READONLY 4 /* Open a normal database read-only */ #define SHELL_OPEN_DESERIALIZE 5 /* Open using sqlite3_deserialize() */ +/* Allowed values for ShellState.eTraceType +*/ +#define SHELL_TRACE_PLAIN 0 /* Show input SQL text */ +#define SHELL_TRACE_EXPANDED 1 /* Show expanded SQL text */ +#define SHELL_TRACE_NORMALIZED 2 /* Show normalized SQL text */ + /* ** These are the allowed shellFlgs values */ @@ -3492,7 +3499,22 @@ static const char *(azHelp[]) = { ".testcase NAME Begin redirecting output to 'testcase-out.txt'", ".timeout MS Try opening locked tables for MS milliseconds", ".timer on|off Turn SQL timer on or off", - ".trace FILE|off Output each SQL statement as it is run", +#ifndef SQLITE_OMIT_TRACE + ".trace ?OPTIONS? Output each SQL statement as it is run", + " FILE Send output to FILE", + " stdout Send output to stdout", + " stderr Send output to stderr", + " off Disable tracing", + " --expanded Expand query parameters", +#ifdef SQLITE_ENABLE_NORMALIZE + " --normalized Normal the SQL statements", +#endif + " --plain Show SQL as it is input", + " --stmt Trace statement execution (SQLITE_TRACE_STMT)", + " --profile Profile statements (SQLITE_TRACE_PROFILE)", + " --row Trace each row (SQLITE_TRACE_ROW)", + " --close Trace connection close (SQLITE_TRACE_CLOSE)", +#endif /* SQLITE_OMIT_TRACE */ ".vfsinfo ?AUX? Information about the top-level VFS", ".vfslist List all available VFSes", ".vfsname ?AUX? Print the name of the VFS stack", @@ -3999,24 +4021,60 @@ static FILE *output_file_open(const char *zFile, int bTextMode){ return f; } -#if !defined(SQLITE_OMIT_TRACE) && !defined(SQLITE_OMIT_FLOATING_POINT) +#ifndef SQLITE_OMIT_TRACE /* ** A routine for handling output from sqlite3_trace(). */ static int sql_trace_callback( - unsigned mType, - void *pArg, - void *pP, - void *pX + unsigned mType, /* The trace type */ + void *pArg, /* The ShellState pointer */ + void *pP, /* Usually a pointer to sqlite_stmt */ + void *pX /* Auxiliary output */ ){ - FILE *f = (FILE*)pArg; - UNUSED_PARAMETER(mType); - UNUSED_PARAMETER(pP); - if( f ){ - const char *z = (const char*)pX; - int i = strlen30(z); - while( i>0 && z[i-1]==';' ){ i--; } - utf8_printf(f, "%.*s;\n", i, z); + ShellState *p = (ShellState*)pArg; + sqlite3_stmt *pStmt; + const char *zSql; + int nSql; + if( p->traceOut==0 ) return 0; + if( mType==SQLITE_TRACE_CLOSE ){ + utf8_printf(p->traceOut, "-- closing database connection\n"); + return 0; + } + if( mType!=SQLITE_TRACE_ROW && ((const char*)pX)[0]=='-' ){ + zSql = (const char*)pX; + }else{ + pStmt = (sqlite3_stmt*)pP; + switch( p->eTraceType ){ + case SHELL_TRACE_EXPANDED: { + zSql = sqlite3_expanded_sql(pStmt); + break; + } +#ifdef SQLITE_ENABLE_NORMALIZE + case SHELL_TRACE_NORMALIZED: { + zSql = sqlite3_normalized_sql(pStmt); + break; + } +#endif + default: { + zSql = sqlite3_sql(pStmt); + break; + } + } + } + if( zSql==0 ) return 0; + nSql = strlen30(zSql); + while( nSql>0 && zSql[nSql-1]==';' ){ nSql--; } + switch( mType ){ + case SQLITE_TRACE_ROW: + case SQLITE_TRACE_STMT: { + utf8_printf(p->traceOut, "%.*s;\n", nSql, zSql); + break; + } + case SQLITE_TRACE_PROFILE: { + sqlite3_int64 nNanosec = *(sqlite3_int64*)pX; + utf8_printf(p->traceOut, "%.*s; -- %lld ns\n", nSql, zSql, nNanosec); + break; + } } return 0; } @@ -7838,23 +7896,55 @@ static int do_meta_command(char *zLine, ShellState *p){ } }else +#ifndef SQLITE_OMIT_TRACE if( c=='t' && strncmp(azArg[0], "trace", n)==0 ){ + int mType = 0; + int jj; open_db(p, 0); - if( nArg!=2 ){ - raw_printf(stderr, "Usage: .trace FILE|off\n"); - rc = 1; - goto meta_command_exit; + for(jj=1; jjeTraceType = SHELL_TRACE_EXPANDED; + } +#ifdef SQLITE_ENABLE_NORMALIZE + else if( optionMatch(z, "normalized") ){ + p->eTraceType = SHELL_TRACE_NORMALIZED; + } +#endif + else if( optionMatch(z, "plain") ){ + p->eTraceType = SHELL_TRACE_PLAIN; + } + else if( optionMatch(z, "profile") ){ + mType |= SQLITE_TRACE_PROFILE; + } + else if( optionMatch(z, "row") ){ + mType |= SQLITE_TRACE_ROW; + } + else if( optionMatch(z, "stmt") ){ + mType |= SQLITE_TRACE_STMT; + } + else if( optionMatch(z, "close") ){ + mType |= SQLITE_TRACE_CLOSE; + } + else { + raw_printf(stderr, "Unknown option \"%s\" on \".trace\"\n", z); + rc = 1; + goto meta_command_exit; + } + }else{ + output_file_close(p->traceOut); + p->traceOut = output_file_open(azArg[1], 0); + } } - output_file_close(p->traceOut); - p->traceOut = output_file_open(azArg[1], 0); -#if !defined(SQLITE_OMIT_TRACE) && !defined(SQLITE_OMIT_FLOATING_POINT) if( p->traceOut==0 ){ sqlite3_trace_v2(p->db, 0, 0, 0); }else{ - sqlite3_trace_v2(p->db, SQLITE_TRACE_STMT, sql_trace_callback,p->traceOut); + if( mType==0 ) mType = SQLITE_TRACE_STMT; + sqlite3_trace_v2(p->db, mType, sql_trace_callback, p); } -#endif }else +#endif /* !defined(SQLITE_OMIT_TRACE) */ #if SQLITE_USER_AUTHENTICATION if( c=='u' && strncmp(azArg[0], "user", n)==0 ){ diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 52a111555d..7c43a1c667 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1365,10 +1365,13 @@ void sqlite3CryptFunc(sqlite3_context*,int,sqlite3_value**); /* This is an extra SQLITE_TRACE macro that indicates "legacy" tracing ** in the style of sqlite3_trace() */ -#define SQLITE_TRACE_LEGACY 0x80 +#define SQLITE_TRACE_LEGACY 0x40 /* Use the legacy xTrace */ +#define SQLITE_TRACE_XPROFILE 0x80 /* Use the legacy xProfile */ #else -#define SQLITE_TRACE_LEGACY 0 +#define SQLITE_TRACE_LEGACY 0 +#define SQLITE_TRACE_XPROFILE 0 #endif /* SQLITE_OMIT_DEPRECATED */ +#define SQLITE_TRACE_NONLEGACY_MASK 0x0f /* Normal flags */ /* @@ -1429,8 +1432,10 @@ struct sqlite3 { void **aExtension; /* Array of shared library handles */ int (*xTrace)(u32,void*,void*,void*); /* Trace function */ void *pTraceArg; /* Argument to the trace function */ +#ifndef SQLITE_OMIT_DEPRECATED void (*xProfile)(void*,const char*,u64); /* Profiling function */ void *pProfileArg; /* Argument to profile function */ +#endif void *pCommitArg; /* Argument to xCommitCallback() */ int (*xCommitCallback)(void*); /* Invoked at every commit. */ void *pRollbackArg; /* Argument to xRollbackCallback() */ @@ -4433,7 +4438,7 @@ int sqlite3VdbeParameterIndex(Vdbe*, const char*, int); int sqlite3TransferBindings(sqlite3_stmt *, sqlite3_stmt *); void sqlite3ParserReset(Parse*); #ifdef SQLITE_ENABLE_NORMALIZE -void sqlite3Normalize(Vdbe*, const char*, int, u8); +char *sqlite3Normalize(Vdbe*, const char*, int); #endif int sqlite3Reprepare(Vdbe*); void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*); diff --git a/src/vdbeapi.c b/src/vdbeapi.c index 59327bed38..ca073589f7 100644 --- a/src/vdbeapi.c +++ b/src/vdbeapi.c @@ -62,14 +62,16 @@ static SQLITE_NOINLINE void invokeProfileCallback(sqlite3 *db, Vdbe *p){ sqlite3_int64 iNow; sqlite3_int64 iElapse; assert( p->startTime>0 ); - assert( db->xProfile!=0 || (db->mTrace & SQLITE_TRACE_PROFILE)!=0 ); + assert( (db->mTrace & (SQLITE_TRACE_PROFILE|SQLITE_TRACE_XPROFILE))!=0 ); assert( db->init.busy==0 ); assert( p->zSql!=0 ); sqlite3OsCurrentTimeInt64(db->pVfs, &iNow); iElapse = (iNow - p->startTime)*1000000; +#ifndef SQLITE_OMIT_DEPRECATED if( db->xProfile ){ db->xProfile(db->pProfileArg, p->zSql, iElapse); } +#endif if( db->mTrace & SQLITE_TRACE_PROFILE ){ db->xTrace(SQLITE_TRACE_PROFILE, db->pTraceArg, p, (void*)&iElapse); } @@ -602,7 +604,7 @@ static int sqlite3Step(Vdbe *p){ ); #ifndef SQLITE_OMIT_TRACE - if( (db->xProfile || (db->mTrace & SQLITE_TRACE_PROFILE)!=0) + if( (db->mTrace & (SQLITE_TRACE_PROFILE|SQLITE_TRACE_XPROFILE))!=0 && !db->init.busy && p->zSql ){ sqlite3OsCurrentTimeInt64(db->pVfs, &p->startTime); }else{ @@ -629,16 +631,18 @@ static int sqlite3Step(Vdbe *p){ db->nVdbeExec--; } + if( rc!=SQLITE_ROW ){ #ifndef SQLITE_OMIT_TRACE - /* If the statement completed successfully, invoke the profile callback */ - if( rc!=SQLITE_ROW ) checkProfileCallback(db, p); + /* If the statement completed successfully, invoke the profile callback */ + checkProfileCallback(db, p); #endif - if( rc==SQLITE_DONE && db->autoCommit ){ - assert( p->rc==SQLITE_OK ); - p->rc = doWalCallbacks(db); - if( p->rc!=SQLITE_OK ){ - rc = SQLITE_ERROR; + if( rc==SQLITE_DONE && db->autoCommit ){ + assert( p->rc==SQLITE_OK ); + p->rc = doWalCallbacks(db); + if( p->rc!=SQLITE_OK ){ + rc = SQLITE_ERROR; + } } } @@ -1708,7 +1712,11 @@ char *sqlite3_expanded_sql(sqlite3_stmt *pStmt){ */ const char *sqlite3_normalized_sql(sqlite3_stmt *pStmt){ Vdbe *p = (Vdbe *)pStmt; - return p ? p->zNormSql : 0; + if( p==0 ) return 0; + if( p->zNormSql==0 && p->zSql!=0 ){ + p->zNormSql = sqlite3Normalize(p, p->zSql, sqlite3Strlen30(p->zSql)); + } + return p->zNormSql; } #endif /* SQLITE_ENABLE_NORMALIZE */ diff --git a/src/vdbeaux.c b/src/vdbeaux.c index 85f0f7980f..ba8d2b11c2 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -67,7 +67,7 @@ void sqlite3VdbeSetSql(Vdbe *p, const char *z, int n, u8 prepFlags){ #ifdef SQLITE_ENABLE_NORMALIZE assert( p->zNormSql==0 ); if( p->zSql && (prepFlags & SQLITE_PREPARE_NORMALIZE)!=0 ){ - sqlite3Normalize(p, p->zSql, n, prepFlags); + p->zNormSql = sqlite3Normalize(p, p->zSql, n); assert( p->zNormSql!=0 || p->db->mallocFailed ); } #endif diff --git a/src/wal.c b/src/wal.c index c67bd9dcac..4b22ba42ef 100644 --- a/src/wal.c +++ b/src/wal.c @@ -2856,19 +2856,37 @@ void sqlite3WalEndReadTransaction(Wal *pWal){ } /* -** Search the hash tables for an entry matching page number pgno. Ignore -** any entries that lie after frame iLast within the wal file. +** Search the wal file for page pgno. If found, set *piRead to the frame that +** contains the page. Otherwise, if pgno is not in the wal file, set *piRead +** to zero. +** +** Return SQLITE_OK if successful, or an error code if an error occurs. If an +** error does occur, the final value of *piRead is undefined. */ -static int walFindFrame( - Wal *pWal, - Pgno pgno, - u32 iLast, - u32 *piRead +int sqlite3WalFindFrame( + Wal *pWal, /* WAL handle */ + Pgno pgno, /* Database page number to read data for */ + u32 *piRead /* OUT: Frame number (or zero) */ ){ + u32 iRead = 0; /* If !=0, WAL frame to return data from */ + u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ - u32 iRead = 0; int iMinHash; + /* This routine is only be called from within a read transaction. */ + assert( pWal->readLock>=0 || pWal->lockError ); + + /* If the "last page" field of the wal-index header snapshot is 0, then + ** no data will be read from the wal under any circumstances. Return early + ** in this case as an optimization. Likewise, if pWal->readLock==0, + ** then the WAL is ignored by the reader so return early, as if the + ** WAL were empty. + */ + if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){ + *piRead = 0; + return SQLITE_OK; + } + /* Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). ** @@ -2919,48 +2937,11 @@ static int walFindFrame( if( iRead ) break; } - *piRead = iRead; - return SQLITE_OK; -} - -/* -** Search the wal file for page pgno. If found, set *piRead to the frame that -** contains the page. Otherwise, if pgno is not in the wal file, set *piRead -** to zero. -** -** Return SQLITE_OK if successful, or an error code if an error occurs. If an -** error does occur, the final value of *piRead is undefined. -*/ -int sqlite3WalFindFrame( - Wal *pWal, /* WAL handle */ - Pgno pgno, /* Database page number to read data for */ - u32 *piRead /* OUT: Frame number (or zero) */ -){ - u32 iRead = 0; /* If !=0, WAL frame to return data from */ - u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ - int rc; - - /* This routine is only be called from within a read transaction. */ - assert( pWal->readLock>=0 || pWal->lockError ); - - /* If the "last page" field of the wal-index header snapshot is 0, then - ** no data will be read from the wal under any circumstances. Return early - ** in this case as an optimization. Likewise, if pWal->readLock==0, - ** then the WAL is ignored by the reader so return early, as if the - ** WAL were empty. - */ - if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){ - *piRead = 0; - return SQLITE_OK; - } - - rc = walFindFrame(pWal, pgno, iLast, &iRead); - #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* If expensive assert() statements are available, do a linear search ** of the wal-index file content. Make sure the results agree with the ** result obtained using the hash indexes above. */ - if( rc==SQLITE_OK ){ + { u32 iRead2 = 0; u32 iTest; assert( pWal->bShmUnreliable || pWal->minFrame>0 ); @@ -2975,7 +2956,7 @@ int sqlite3WalFindFrame( #endif *piRead = iRead; - return rc; + return SQLITE_OK; } /* diff --git a/test/fts4umlaut.test b/test/fts4umlaut.test new file mode 100644 index 0000000000..4dd96b220c --- /dev/null +++ b/test/fts4umlaut.test @@ -0,0 +1,65 @@ +# 2018 December 3 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts4umlaut + +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + CREATE VIRTUAL TABLE t2 USING fts4( + x, + tokenize=unicode61 "remove_diacritics=2" + ); +} + +foreach {tn q res1 res2} { + 1 "Hà Nội" 0 1 + 2 "Hà Noi" 1 1 + 3 "Ha Noi" 1 1 + 4 "Ha N\u1ed9i" 0 1 + 5 "Ha N\u006fi" 1 1 + 6 "Ha N\u006f\u0302i" 1 1 + 7 "Ha N\u006f\u0323\u0302i" 1 1 +} { + do_execsql_test 1.$tn.1 { + DELETE FROM t1; + INSERT INTO t1(rowid, x) VALUES (1, 'Ha Noi'); + SELECT count(*) FROM t1 WHERE t1 MATCH $q + } $res1 + do_execsql_test 1.$tn.2 { + DELETE FROM t1; + INSERT INTO t1(rowid, x) VALUES (1, $q); + SELECT count(*) FROM t1 WHERE t1 MATCH 'Ha Noi' + } $res1 + + do_execsql_test 1.$tn.2 { + DELETE FROM t2; + INSERT INTO t2(rowid, x) VALUES (1, 'Ha Noi'); + SELECT count(*) FROM t2 WHERE t2 MATCH $q + } $res2 + do_execsql_test 1.$tn.2 { + DELETE FROM t2; + INSERT INTO t2(rowid, x) VALUES (1, $q); + SELECT count(*) FROM t2 WHERE t2 MATCH 'Ha Noi' + } $res2 +} + +finish_test + diff --git a/test/json101.test b/test/json101.test index 9a93ee739f..534478df93 100644 --- a/test/json101.test +++ b/test/json101.test @@ -813,6 +813,23 @@ do_execsql_test json-14.170 { SELECT fullkey FROM json_tree('null'); } {$} - +# 2018-12-03 +# Make sure the table-valued functions contained within parentheses +# work correctly. +# +# Bug reported via private email. See TH3 for more information. +# +do_execsql_test json-15.100 { + SELECT * FROM JSON_EACH('{"a":1, "b":2}'); +} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}} +do_execsql_test json-15.110 { + SELECT xyz.* FROM JSON_EACH('{"a":1, "b":2}') AS xyz; +} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}} +do_execsql_test json-15.120 { + SELECT * FROM (JSON_EACH('{"a":1, "b":2}')); +} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}} +do_execsql_test json-15.130 { + SELECT xyz.* FROM (JSON_EACH('{"a":1, "b":2}')) AS xyz; +} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}} finish_test diff --git a/test/shell4.test b/test/shell4.test index 88e5e69a28..2210e74a67 100644 --- a/test/shell4.test +++ b/test/shell4.test @@ -107,14 +107,14 @@ SELECT 1; } {1 1 1} do_test shell4-2.1 { - catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace" -} {1 {Usage: .trace FILE|off}} + catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace --unknown" +} {1 {Unknown option "--unknown" on ".trace"}} do_test shell4-2.2 { catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace off\n.trace off\n" } {0 {}} do_test shell4-2.3 { - catchcmd ":memory:" ".trace stdout\n.trace\n.trace off\n.dump\n" -} {/^1 {PRAGMA.*Usage:.*}$/} + catchcmd ":memory:" ".trace stdout\n.dump\n.trace off\n" +} {/^0 {PRAGMA.*}$/} ifcapable trace { do_test shell4-2.4 { catchcmd ":memory:" ".trace stdout\nCREATE TABLE t1(x);SELECT * FROM t1;" diff --git a/test/tabfunc01.test b/test/tabfunc01.test index dfe3190b52..49f0df889e 100644 --- a/test/tabfunc01.test +++ b/test/tabfunc01.test @@ -125,6 +125,11 @@ do_execsql_test tabfunc01-4.3 { SELECT * FROM aux1.generate_series(1,4) } {1 2 3 4} +# 2018-12-03: Fix bug reported by by private email. +do_execsql_test tabfunc01-4.4 { + SELECT * FROM (generate_series(1,5,2)) AS x LIMIT 10; +} {1 3 5} + # The next series of tests is verifying that virtual table are able # to optimize the IN operator, even on terms that are not marked "omit". # When the generate_series virtual table is compiled for the testfixture, diff --git a/tool/index_usage.c b/tool/index_usage.c new file mode 100644 index 0000000000..a86202425e --- /dev/null +++ b/tool/index_usage.c @@ -0,0 +1,164 @@ +/* +** 2018-12-04 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file implements a utility program used to help determine which +** indexes in a database schema are used and unused, and how often specific +** indexes are used. +*/ +#include "sqlite3.h" +#include +#include +#include +#include + +static void usage(const char *argv0){ + printf("Usage: %s DATABASE LOG\n\n", argv0); + printf( + "DATABASE is an SQLite database against which various statements\n" + "have been run. The SQL text is stored in LOG. LOG is an SQLite\n" + "database with this schema:\n" + "\n" + " CREATE TABLE sqllog(sql TEXT);\n" + "\n" + "This utility program analyzes statements contained in LOG and prints\n" + "a report showing how many times each index in DATABASE is used by the\n" + "statements in LOG.\n" + "\n" + "DATABASE only needs to contain the schema used by the statements in\n" + "LOG. The content can be removed from DATABASE.\n" + ); + printf("\nAnalysis will be done by SQLite version %s dated %.20s\n" + "checkin number %.40s. Different versions\n" + "of SQLite might use different indexes.\n", + sqlite3_libversion(), sqlite3_sourceid(), sqlite3_sourceid()+21); + exit(1); +} + +int main(int argc, char **argv){ + sqlite3 *db = 0; /* The main database */ + sqlite3_stmt *pStmt = 0; /* a query */ + char *zSql; + int nErr = 0; + int rc; + + if( argc!=3 ) usage(argv[0]); + rc = sqlite3_open_v2(argv[1], &db, SQLITE_OPEN_READONLY, 0); + if( rc ){ + printf("Cannot open \"%s\" for reading: %s\n", argv[1], sqlite3_errmsg(db)); + goto errorOut; + } + rc = sqlite3_prepare_v2(db, "SELECT * FROM sqlite_master", -1, &pStmt, 0); + if( rc ){ + printf("Cannot read the schema from \"%s\" - %s\n", argv[1], + sqlite3_errmsg(db)); + goto errorOut; + } + sqlite3_finalize(pStmt); + pStmt = 0; + rc = sqlite3_exec(db, + "CREATE TABLE temp.idxu(\n" + " tbl TEXT,\n" + " idx TEXT,\n" + " cnt INT,\n" + " PRIMARY KEY(idx)\n" + ") WITHOUT ROWID;", 0, 0, 0); + if( rc ){ + printf("Cannot create the result table - %s\n", + sqlite3_errmsg(db)); + goto errorOut; + } + rc = sqlite3_exec(db, + "INSERT INTO temp.idxu(tbl,idx,cnt)" + " SELECT tbl_name, name, 0 FROM sqlite_master" + " WHERE type='index' AND sql IS NOT NULL", 0, 0, 0); + + /* Open the LOG database */ + zSql = sqlite3_mprintf("ATTACH %Q AS log", argv[2]); + rc = sqlite3_exec(db, zSql, 0, 0, 0); + sqlite3_free(zSql); + if( rc ){ + printf("Cannot open the LOG database \"%s\" - %s\n", + argv[2], sqlite3_errmsg(db)); + goto errorOut; + } + rc = sqlite3_prepare_v2(db, "SELECT sql, rowid FROM log.sqllog", + -1, &pStmt, 0); + if( rc ){ + printf("Cannot read the SQLLOG table in the LOG database \"%s\" - %s\n", + argv[2], sqlite3_errmsg(db)); + goto errorOut; + } + + /* Update the counts based on LOG */ + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + const char *zLog = (const char*)sqlite3_column_text(pStmt, 0); + sqlite3_stmt *pS2; + if( zLog==0 ) continue; + zSql = sqlite3_mprintf("EXPLAIN QUERY PLAN %s", zLog); + rc = sqlite3_prepare_v2(db, zSql, -1, &pS2, 0); + sqlite3_free(zSql); + if( rc ){ + printf("Cannot compile LOG entry %d (%s): %s\n", + sqlite3_column_int(pStmt, 1), zLog, sqlite3_errmsg(db)); + nErr++; + }else{ + while( sqlite3_step(pS2)==SQLITE_ROW ){ + const char *zExplain = (const char*)sqlite3_column_text(pS2,3); + const char *z1, *z2; + int n; + /* printf("EXPLAIN: %s\n", zExplain); */ + z1 = strstr(zExplain, " USING INDEX "); + if( z1==0 ) continue; + z1 += 13; + for(z2=z1+1; z2[1] && z2[1]!='('; z2++){} + n = z2 - z1; + zSql = sqlite3_mprintf( + "UPDATE temp.idxu SET cnt=cnt+1 WHERE idx='%.*q'", n, z1 + ); + /* printf("sql: %s\n", zSql); */ + sqlite3_exec(db, zSql, 0, 0, 0); + sqlite3_free(zSql); + } + } + sqlite3_finalize(pS2); + } + sqlite3_finalize(pStmt); + + /* Generate the report */ + rc = sqlite3_prepare_v2(db, + "SELECT tbl, idx, cnt, " + " (SELECT group_concat(name,',') FROM pragma_index_info(idx))" + " FROM temp.idxu, main.sqlite_master" + " WHERE temp.idxu.tbl=main.sqlite_master.tbl_name" + " AND temp.idxu.idx=main.sqlite_master.name" + " ORDER BY cnt DESC, tbl, idx", + -1, &pStmt, 0); + if( rc ){ + printf("Cannot query the result table - %s\n", + sqlite3_errmsg(db)); + goto errorOut; + } + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + printf("%10d %s on %s(%s)\n", + sqlite3_column_int(pStmt, 2), + sqlite3_column_text(pStmt, 1), + sqlite3_column_text(pStmt, 0), + sqlite3_column_text(pStmt, 3)); + } + sqlite3_finalize(pStmt); + pStmt = 0; + +errorOut: + sqlite3_finalize(pStmt); + sqlite3_close(db); + return nErr; +} diff --git a/tool/lemon.c b/tool/lemon.c index 1fca8b9755..7f0e557535 100644 --- a/tool/lemon.c +++ b/tool/lemon.c @@ -4590,13 +4590,20 @@ void ReportTable( tplt_print(out,lemp,lemp->overflow,&lineno); tplt_xfer(lemp->name,in,out,&lineno); - /* Generate the table of rule information + /* Generate the tables of rule information. yyRuleInfoLhs[] and + ** yyRuleInfoNRhs[]. ** ** Note: This code depends on the fact that rules are number ** sequentually beginning with 0. */ for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ - fprintf(out," { %4d, %4d }, /* (%d) ",rp->lhs->index,-rp->nrhs,i); + fprintf(out," %4d, /* (%d) ", rp->lhs->index, i); + rule_print(out, rp); + fprintf(out," */\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + fprintf(out," %3d, /* (%d) ", -rp->nrhs, i); rule_print(out, rp); fprintf(out," */\n"); lineno++; } diff --git a/tool/lempar.c b/tool/lempar.c index 325b0e5418..94c0a3162a 100644 --- a/tool/lempar.c +++ b/tool/lempar.c @@ -686,13 +686,15 @@ static void yy_shift( yyTraceShift(yypParser, yyNewState, "Shift"); } -/* The following table contains information about every rule that -** is used during the reduce. -*/ -static const struct { - YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */ - signed char nrhs; /* Negative of the number of RHS symbols in the rule */ -} yyRuleInfo[] = { +/* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side +** of that rule */ +static const YYCODETYPE yyRuleInfoLhs[] = { +%% +}; + +/* For rule J, yyRuleInfoNRhs[J] contains the negative of the number +** of symbols on the right-hand side of that rule. */ +static const signed char yyRuleInfoNRhs[] = { %% }; @@ -725,7 +727,7 @@ static YYACTIONTYPE yy_reduce( yymsp = yypParser->yytos; #ifndef NDEBUG if( yyTraceFILE && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){ - yysize = yyRuleInfo[yyruleno].nrhs; + yysize = yyRuleInfoNRhs[yyruleno]; if( yysize ){ fprintf(yyTraceFILE, "%sReduce %d [%s], go to state %d.\n", yyTracePrompt, @@ -740,7 +742,7 @@ static YYACTIONTYPE yy_reduce( /* Check that the stack is large enough to grow by a single entry ** if the RHS of the rule is empty. This ensures that there is room ** enough on the stack to push the LHS value */ - if( yyRuleInfo[yyruleno].nrhs==0 ){ + if( yyRuleInfoNRhs[yyruleno]==0 ){ #ifdef YYTRACKMAXSTACKDEPTH if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){ yypParser->yyhwm++; @@ -782,9 +784,9 @@ static YYACTIONTYPE yy_reduce( %% /********** End reduce actions ************************************************/ }; - assert( yyruleno