1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-01 06:27:03 +03:00

Add special fast paths to sqlite3FtsUnicodeTolower() and Isalnum() for codepoints in the ASCII range.

FossilOrigin-Name: cf7b25d47687635a04f4347d45f135c686b9d758
This commit is contained in:
dan
2012-05-25 19:50:12 +00:00
parent 80ed5a56a5
commit 1c7016c9a5
5 changed files with 47 additions and 13 deletions

View File

@ -201,6 +201,7 @@ static int unicodeNext(
if( !zNew ) return SQLITE_NOMEM;
zOut = &zNew[zOut - pCsr->zToken];
pCsr->zToken = zNew;
pCsr->nAlloc += 64;
}
/* Write the folded case of the last character read to the output */

View File

@ -121,8 +121,13 @@ int sqlite3FtsUnicodeIsalnum(int c){
0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001,
0x43FFF401,
};
static const unsigned int aAscii[4] = {
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
};
if( c<(1<<22) ){
if( c<128 ){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
@ -236,7 +241,9 @@ int sqlite3FtsUnicodeTolower(int c){
assert( c>=0 );
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<65536 ){
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;

View File

@ -105,6 +105,27 @@ proc an_print_range_array {lRange} {
puts " \};"
}
proc an_print_ascii_bitmap {lRange} {
foreach range $lRange {
foreach {iFirst nRange} $range {}
for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} {
if {$i<=127} { set a($i) 1 }
}
}
set aAscii [list 0 0 0 0]
foreach key [array names a] {
set idx [expr $key >> 5]
lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))]
}
puts " static const unsigned int aAscii\[4\] = \{"
puts -nonewline " "
foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] }
puts ""
puts " \};"
}
proc print_isalnum {zFunc lRange} {
puts "/*"
puts "** Return true if the argument corresponds to a unicode codepoint"
@ -115,8 +136,11 @@ proc print_isalnum {zFunc lRange} {
puts "*/"
puts "int ${zFunc}\(int c)\{"
an_print_range_array $lRange
an_print_ascii_bitmap $lRange
puts {
if( c<(1<<22) ){
if( c<128 ){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
@ -365,7 +389,9 @@ proc print_tolower {zFunc} {
assert( c>=0 );
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<65536 ){
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;

View File

@ -1,5 +1,5 @@
C Fix\scomments\sin\sgenerated\sfile\sfts3_unicode2.c.
D 2012-05-25T18:48:48.456
C Add\sspecial\sfast\spaths\sto\ssqlite3FtsUnicodeTolower()\sand\sIsalnum()\sfor\scodepoints\sin\sthe\sASCII\srange.
D 2012-05-25T19:50:12.903
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2f37e468503dbe79d35c9f6dffcf3fae1ae9ec20
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -70,15 +70,15 @@ F ext/fts3/fts3_test.c 348f7d08cae05285794e23dc4fe8b8fdf66e264a
F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce
F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68
F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004
F ext/fts3/fts3_unicode.c 033ee5d10d1a69613890d892829e6d3cf7177e40
F ext/fts3/fts3_unicode2.c 6989db92aff500ae9795c1b16720ff5a17bfbf0f
F ext/fts3/fts3_unicode.c f487c6ef0ac2cc35aec3c3636c0a35483da6453c
F ext/fts3/fts3_unicode2.c 75fa8f249a5c17d324969e90d7066e7021a90874
F ext/fts3/fts3_write.c cd4af00b3b0512b4d76177a267fcaafab44cbce4
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl 3ff244e41222fa5e43c60739c501131a2395b310
F ext/fts3/unicode/mkunicode.tcl a7214d1705cb57ff56fb828002fa811192a25524
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
F ext/icu/icu.c eb9ae1d79046bd7871aa97ee6da51eb770134b5a
F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
@ -1004,7 +1004,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings-clang.sh a8a0a3babda96dfb1ff51adda3cbbf3dfb7266c2
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
P 0c13570ec78c6887103dc99b81b470829fa28385
R 9b33e64eb6dd16074aa14c4bbd40c221
P 3dc567ef4702d9a63d78d11ff705cb7f7359f7a6
R a4a319ece993f7d02e2811bac8d8bd2b
U dan
Z a6323715198787ea389cc785d187a551
Z e7946b1fde61e6ce4870009a94d8c133

View File

@ -1 +1 @@
3dc567ef4702d9a63d78d11ff705cb7f7359f7a6
cf7b25d47687635a04f4347d45f135c686b9d758