mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-30 19:03:16 +03:00
Don't call ctype functions on hi-bit chars. Some platforms raise
assertions when this occurs, and it's almost certainly not the right thing to do in the first place. (CVS 3746) FossilOrigin-Name: f6c3abdc6c5e916e5366ba28fb1cd06ca3554303
This commit is contained in:
@ -177,6 +177,25 @@ static int getVarint32(const char *p, int *pi){
|
|||||||
* the previous token to make the estimate a tiny bit more precise.
|
* the previous token to make the estimate a tiny bit more precise.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* It is not safe to call isspace(), tolower(), or isalnum() on
|
||||||
|
** hi-bit-set characters. This is the same solution used in the
|
||||||
|
** tokenizer.
|
||||||
|
*/
|
||||||
|
/* TODO(shess) The snippet-generation code should be using the
|
||||||
|
** tokenizer-generated tokens rather than doing its own local
|
||||||
|
** tokenization.
|
||||||
|
*/
|
||||||
|
/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
|
||||||
|
static int safe_isspace(char c){
|
||||||
|
return (c&0x80)==0 ? isspace(c) : 0;
|
||||||
|
}
|
||||||
|
static int safe_tolower(char c){
|
||||||
|
return (c&0x80)==0 ? tolower(c) : c;
|
||||||
|
}
|
||||||
|
static int safe_isalnum(char c){
|
||||||
|
return (c&0x80)==0 ? isalnum(c) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
typedef enum DocListType {
|
typedef enum DocListType {
|
||||||
DL_DOCIDS, /* docids only */
|
DL_DOCIDS, /* docids only */
|
||||||
DL_POSITIONS, /* docids + positions */
|
DL_POSITIONS, /* docids + positions */
|
||||||
@ -1536,7 +1555,7 @@ static int getToken(const char *z, int *tokenType){
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
case ' ': case '\t': case '\n': case '\f': case '\r': {
|
case ' ': case '\t': case '\n': case '\f': case '\r': {
|
||||||
for(i=1; isspace(z[i]); i++){}
|
for(i=1; safe_isspace(z[i]); i++){}
|
||||||
*tokenType = TOKEN_SPACE;
|
*tokenType = TOKEN_SPACE;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
@ -1688,7 +1707,7 @@ static void tokenListToIdList(char **azIn){
|
|||||||
int i, j;
|
int i, j;
|
||||||
if( azIn ){
|
if( azIn ){
|
||||||
for(i=0, j=-1; azIn[i]; i++){
|
for(i=0, j=-1; azIn[i]; i++){
|
||||||
if( isalnum(azIn[i][0]) || azIn[i][1] ){
|
if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
|
||||||
dequoteString(azIn[i]);
|
dequoteString(azIn[i]);
|
||||||
if( j>=0 ){
|
if( j>=0 ){
|
||||||
azIn[j] = azIn[i];
|
azIn[j] = azIn[i];
|
||||||
@ -1737,11 +1756,11 @@ static char *firstToken(char *zIn, char **pzTail){
|
|||||||
** s[] is t[].
|
** s[] is t[].
|
||||||
*/
|
*/
|
||||||
static int startsWith(const char *s, const char *t){
|
static int startsWith(const char *s, const char *t){
|
||||||
while( isspace(*s) ){ s++; }
|
while( safe_isspace(*s) ){ s++; }
|
||||||
while( *t ){
|
while( *t ){
|
||||||
if( tolower(*s++)!=tolower(*t++) ) return 0;
|
if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
|
||||||
}
|
}
|
||||||
return *s!='_' && !isalnum(*s);
|
return *s!='_' && !safe_isalnum(*s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1853,7 +1872,7 @@ static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
|
|||||||
char *p;
|
char *p;
|
||||||
pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
|
pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
|
||||||
for (p = pSpec->azContentColumn[i]; *p ; ++p) {
|
for (p = pSpec->azContentColumn[i]; *p ; ++p) {
|
||||||
if( !isalnum(*p) ) *p = '_';
|
if( !safe_isalnum(*p) ) *p = '_';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2330,10 +2349,10 @@ static int wordBoundary(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for(i=1; i<=10; i++){
|
for(i=1; i<=10; i++){
|
||||||
if( isspace(zDoc[iBreak-i]) ){
|
if( safe_isspace(zDoc[iBreak-i]) ){
|
||||||
return iBreak - i + 1;
|
return iBreak - i + 1;
|
||||||
}
|
}
|
||||||
if( isspace(zDoc[iBreak+i]) ){
|
if( safe_isspace(zDoc[iBreak+i]) ){
|
||||||
return iBreak + i + 1;
|
return iBreak + i + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2346,7 +2365,7 @@ static int wordBoundary(
|
|||||||
*/
|
*/
|
||||||
static void appendWhiteSpace(StringBuffer *p){
|
static void appendWhiteSpace(StringBuffer *p){
|
||||||
if( p->len==0 ) return;
|
if( p->len==0 ) return;
|
||||||
if( isspace(p->s[p->len-1]) ) return;
|
if( safe_isspace(p->s[p->len-1]) ) return;
|
||||||
append(p, " ");
|
append(p, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2354,7 +2373,7 @@ static void appendWhiteSpace(StringBuffer *p){
|
|||||||
** Remove white space from teh end of the StringBuffer
|
** Remove white space from teh end of the StringBuffer
|
||||||
*/
|
*/
|
||||||
static void trimWhiteSpace(StringBuffer *p){
|
static void trimWhiteSpace(StringBuffer *p){
|
||||||
while( p->len>0 && isspace(p->s[p->len-1]) ){
|
while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
|
||||||
p->len--;
|
p->len--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -304,6 +304,25 @@ SQLITE_EXTENSION_INIT1
|
|||||||
# define TRACE(A)
|
# define TRACE(A)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* It is not safe to call isspace(), tolower(), or isalnum() on
|
||||||
|
** hi-bit-set characters. This is the same solution used in the
|
||||||
|
** tokenizer.
|
||||||
|
*/
|
||||||
|
/* TODO(shess) The snippet-generation code should be using the
|
||||||
|
** tokenizer-generated tokens rather than doing its own local
|
||||||
|
** tokenization.
|
||||||
|
*/
|
||||||
|
/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
|
||||||
|
static int safe_isspace(char c){
|
||||||
|
return (c&0x80)==0 ? isspace(c) : 0;
|
||||||
|
}
|
||||||
|
static int safe_tolower(char c){
|
||||||
|
return (c&0x80)==0 ? tolower(c) : c;
|
||||||
|
}
|
||||||
|
static int safe_isalnum(char c){
|
||||||
|
return (c&0x80)==0 ? isalnum(c) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
typedef enum DocListType {
|
typedef enum DocListType {
|
||||||
DL_DOCIDS, /* docids only */
|
DL_DOCIDS, /* docids only */
|
||||||
DL_POSITIONS, /* docids + positions */
|
DL_POSITIONS, /* docids + positions */
|
||||||
@ -504,7 +523,7 @@ static void appendList(StringBuffer *sb, int nString, char **azString){
|
|||||||
|
|
||||||
static int endsInWhiteSpace(StringBuffer *p){
|
static int endsInWhiteSpace(StringBuffer *p){
|
||||||
return stringBufferLength(p)>0 &&
|
return stringBufferLength(p)>0 &&
|
||||||
isspace(stringBufferData(p)[stringBufferLength(p)-1]);
|
safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the StringBuffer ends in something other than white space, add a
|
/* If the StringBuffer ends in something other than white space, add a
|
||||||
@ -2194,7 +2213,7 @@ static int getToken(const char *z, int *tokenType){
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
case ' ': case '\t': case '\n': case '\f': case '\r': {
|
case ' ': case '\t': case '\n': case '\f': case '\r': {
|
||||||
for(i=1; isspace(z[i]); i++){}
|
for(i=1; safe_isspace(z[i]); i++){}
|
||||||
*tokenType = TOKEN_SPACE;
|
*tokenType = TOKEN_SPACE;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
@ -2346,7 +2365,7 @@ static void tokenListToIdList(char **azIn){
|
|||||||
int i, j;
|
int i, j;
|
||||||
if( azIn ){
|
if( azIn ){
|
||||||
for(i=0, j=-1; azIn[i]; i++){
|
for(i=0, j=-1; azIn[i]; i++){
|
||||||
if( isalnum(azIn[i][0]) || azIn[i][1] ){
|
if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
|
||||||
dequoteString(azIn[i]);
|
dequoteString(azIn[i]);
|
||||||
if( j>=0 ){
|
if( j>=0 ){
|
||||||
azIn[j] = azIn[i];
|
azIn[j] = azIn[i];
|
||||||
@ -2395,11 +2414,11 @@ static char *firstToken(char *zIn, char **pzTail){
|
|||||||
** s[] is t[].
|
** s[] is t[].
|
||||||
*/
|
*/
|
||||||
static int startsWith(const char *s, const char *t){
|
static int startsWith(const char *s, const char *t){
|
||||||
while( isspace(*s) ){ s++; }
|
while( safe_isspace(*s) ){ s++; }
|
||||||
while( *t ){
|
while( *t ){
|
||||||
if( tolower(*s++)!=tolower(*t++) ) return 0;
|
if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
|
||||||
}
|
}
|
||||||
return *s!='_' && !isalnum(*s);
|
return *s!='_' && !safe_isalnum(*s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2511,7 +2530,7 @@ static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
|
|||||||
char *p;
|
char *p;
|
||||||
pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
|
pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
|
||||||
for (p = pSpec->azContentColumn[i]; *p ; ++p) {
|
for (p = pSpec->azContentColumn[i]; *p ; ++p) {
|
||||||
if( !isalnum(*p) ) *p = '_';
|
if( !safe_isalnum(*p) ) *p = '_';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2971,10 +2990,10 @@ static int wordBoundary(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for(i=1; i<=10; i++){
|
for(i=1; i<=10; i++){
|
||||||
if( isspace(zDoc[iBreak-i]) ){
|
if( safe_isspace(zDoc[iBreak-i]) ){
|
||||||
return iBreak - i + 1;
|
return iBreak - i + 1;
|
||||||
}
|
}
|
||||||
if( isspace(zDoc[iBreak+i]) ){
|
if( safe_isspace(zDoc[iBreak+i]) ){
|
||||||
return iBreak + i + 1;
|
return iBreak + i + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
18
manifest
18
manifest
@ -1,5 +1,5 @@
|
|||||||
C Assume\sthe\smalloc-failed\sflag\scannot\salready\sbe\sset\swhen\scalling\ssqlite3_errmsg(16)().\s(CVS\s3745)
|
C Don't\scall\sctype\sfunctions\son\shi-bit\schars.\s\sSome\splatforms\sraise\nassertions\swhen\sthis\soccurs,\sand\sit's\salmost\scertainly\snot\sthe\sright\nthing\sto\sdo\sin\sthe\sfirst\splace.\s(CVS\s3746)
|
||||||
D 2007-03-29T15:00:53
|
D 2007-03-29T16:30:39
|
||||||
F Makefile.in 2f2c3bf69faf0ae7b8e8af4f94f1986849034530
|
F Makefile.in 2f2c3bf69faf0ae7b8e8af4f94f1986849034530
|
||||||
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
|
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
|
||||||
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
||||||
@ -22,7 +22,7 @@ F ext/README.txt 913a7bd3f4837ab14d7e063304181787658b14e1
|
|||||||
F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e
|
F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e
|
||||||
F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b
|
F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b
|
||||||
F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5
|
F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5
|
||||||
F ext/fts1/fts1.c 0aab3cf20eefd38935c8f525494d689cb2785f1d
|
F ext/fts1/fts1.c 7585d9cb7ad7bcdf162936ab1fd64868f2f55ea5
|
||||||
F ext/fts1/fts1.h 6060b8f62c1d925ea8356cb1a6598073eb9159a6
|
F ext/fts1/fts1.h 6060b8f62c1d925ea8356cb1a6598073eb9159a6
|
||||||
F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114
|
F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114
|
||||||
F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089
|
F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089
|
||||||
@ -34,7 +34,7 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
|
|||||||
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
|
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
|
||||||
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
|
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
|
||||||
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
|
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
|
||||||
F ext/fts2/fts2.c de8321a2ad1edea1f0dd223cb86cf008451784a4
|
F ext/fts2/fts2.c 2e3cb46d28b0dd17b2ad3b48409618ace73caec6
|
||||||
F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1
|
F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1
|
||||||
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
|
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
|
||||||
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
|
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
|
||||||
@ -214,6 +214,7 @@ F test/fts1e.test 77244843e925560b5a0b70069c3e7ab62f181ed2
|
|||||||
F test/fts1f.test 2d6cb10d8b7a4e6edc321bbdb3982f1f48774714
|
F test/fts1f.test 2d6cb10d8b7a4e6edc321bbdb3982f1f48774714
|
||||||
F test/fts1i.test 6bfe08cdfdced063a39a50c8601da65e6274d879
|
F test/fts1i.test 6bfe08cdfdced063a39a50c8601da65e6274d879
|
||||||
F test/fts1j.test e4c0ffcd0ba2adce09c6b7b43ffd0749b5fda5c7
|
F test/fts1j.test e4c0ffcd0ba2adce09c6b7b43ffd0749b5fda5c7
|
||||||
|
F test/fts1k.test fdf295cb797ba6a2ef81ec41cb98df0ceb2e572c
|
||||||
F test/fts1porter.test d86e9c3e0c7f8ff95add6582b4b585fb4e02b96d
|
F test/fts1porter.test d86e9c3e0c7f8ff95add6582b4b585fb4e02b96d
|
||||||
F test/fts2a.test 103fc178d134c54c44c1938a4331e9e2030792d9
|
F test/fts2a.test 103fc178d134c54c44c1938a4331e9e2030792d9
|
||||||
F test/fts2b.test 964abc0236c849c07ca1ae496bb25c268ae94816
|
F test/fts2b.test 964abc0236c849c07ca1ae496bb25c268ae94816
|
||||||
@ -225,6 +226,7 @@ F test/fts2g.test c69a8ab43ec77d123976ba6cf9422d647ae63032
|
|||||||
F test/fts2h.test 223af921323b409d4b5b18ff4e51619541b174bb
|
F test/fts2h.test 223af921323b409d4b5b18ff4e51619541b174bb
|
||||||
F test/fts2i.test 1b22451d1f13f7c509baec620dc3a4a754885dd6
|
F test/fts2i.test 1b22451d1f13f7c509baec620dc3a4a754885dd6
|
||||||
F test/fts2j.test f68d7611f76309bc8b94170f3740d9fbbc061d9b
|
F test/fts2j.test f68d7611f76309bc8b94170f3740d9fbbc061d9b
|
||||||
|
F test/fts2l.test 4c53c89ce3919003765ff4fd8d98ecf724d97dd3
|
||||||
F test/func.test 019d706b2458dfdf239c74cc31143446de1ee44a
|
F test/func.test 019d706b2458dfdf239c74cc31143446de1ee44a
|
||||||
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
|
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
|
||||||
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
|
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
|
||||||
@ -444,7 +446,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
|
|||||||
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
||||||
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
||||||
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
|
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
|
||||||
P 3714ac173289e580a0302a5a3beac05823d92c5b
|
P 54fa22273d551e00e1abd86992ff7c62ec4e0daf
|
||||||
R 3cfcb502e90a93f72d96670b4207913a
|
R 6645d4541d0d9e478c5b564689374f5f
|
||||||
U danielk1977
|
U shess
|
||||||
Z 7e6377bdbc94cfb816f281bbc3868b86
|
Z 5e17544799ed91760b443021ffc206bc
|
||||||
|
@ -1 +1 @@
|
|||||||
54fa22273d551e00e1abd86992ff7c62ec4e0daf
|
f6c3abdc6c5e916e5366ba28fb1cd06ca3554303
|
69
test/fts1k.test
Normal file
69
test/fts1k.test
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# 2007 March 28
|
||||||
|
#
|
||||||
|
# The author disclaims copyright to this source code.
|
||||||
|
#
|
||||||
|
#*************************************************************************
|
||||||
|
# This file implements regression tests for SQLite library. The focus
|
||||||
|
# of this script is testing isspace/isalnum/tolower problems with the
|
||||||
|
# FTS1 module. Unfortunately, this code isn't a really principled set
|
||||||
|
# of tests, because it's impossible to know where new uses of these
|
||||||
|
# functions might appear.
|
||||||
|
#
|
||||||
|
# $Id: fts1k.test,v 1.1 2007/03/29 16:30:41 shess Exp $
|
||||||
|
#
|
||||||
|
|
||||||
|
set testdir [file dirname $argv0]
|
||||||
|
source $testdir/tester.tcl
|
||||||
|
|
||||||
|
# If SQLITE_ENABLE_FTS1 is defined, omit this file.
|
||||||
|
ifcapable !fts1 {
|
||||||
|
finish_test
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tests that startsWith() (calls isspace, tolower, isalnum) can handle
|
||||||
|
# hi-bit chars. parseSpec() also calls isalnum here.
|
||||||
|
do_test fts1k-1.1 {
|
||||||
|
execsql "CREATE VIRTUAL TABLE t1 USING fts1(content, \x80)"
|
||||||
|
} {}
|
||||||
|
|
||||||
|
# Additionally tests isspace() call in getToken(), and isalnum() call
|
||||||
|
# in tokenListToIdList().
|
||||||
|
do_test fts1k-1.2 {
|
||||||
|
catch {
|
||||||
|
execsql "CREATE VIRTUAL TABLE t2 USING fts1(content, tokenize \x80)"
|
||||||
|
}
|
||||||
|
sqlite3_errmsg $DB
|
||||||
|
} "unknown tokenizer: \x80"
|
||||||
|
|
||||||
|
# Additionally test final isalnum() in startsWith().
|
||||||
|
do_test fts1k-1.3 {
|
||||||
|
execsql "CREATE VIRTUAL TABLE t3 USING fts1(content, tokenize\x80)"
|
||||||
|
} {}
|
||||||
|
|
||||||
|
# The snippet-generation code has calls to isspace() which are sort of
|
||||||
|
# hard to get to. It finds convenient breakpoints by starting ~40
|
||||||
|
# chars before and after the matched term, and scanning ~10 chars
|
||||||
|
# around that position for isspace() characters. The long word with
|
||||||
|
# embedded hi-bit chars causes one of these isspace() calls to be
|
||||||
|
# exercised. The version with a couple extra spaces should cause the
|
||||||
|
# other isspace() call to be exercised. [Both cases have been tested
|
||||||
|
# in the debugger, but I'm hoping to continue to catch it if simple
|
||||||
|
# constant changes change things slightly.
|
||||||
|
#
|
||||||
|
# The trailing and leading hi-bit chars help with code which tests for
|
||||||
|
# isspace() to coalesce multiple spaces.
|
||||||
|
|
||||||
|
set word "\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80"
|
||||||
|
set phrase1 "$word $word $word target $word $word $word"
|
||||||
|
set phrase2 "$word $word $word target $word $word $word"
|
||||||
|
|
||||||
|
db eval {CREATE VIRTUAL TABLE t4 USING fts1(content)}
|
||||||
|
db eval "INSERT INTO t4 (content) VALUES ('$phrase1')"
|
||||||
|
db eval "INSERT INTO t4 (content) VALUES ('$phrase2')"
|
||||||
|
|
||||||
|
do_test fts1k-1.4 {
|
||||||
|
execsql {SELECT rowid, length(snippet(t4)) FROM t4 WHERE t4 MATCH 'target'}
|
||||||
|
} {1 111 2 117}
|
||||||
|
|
||||||
|
finish_test
|
69
test/fts2l.test
Normal file
69
test/fts2l.test
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# 2007 March 28
|
||||||
|
#
|
||||||
|
# The author disclaims copyright to this source code.
|
||||||
|
#
|
||||||
|
#*************************************************************************
|
||||||
|
# This file implements regression tests for SQLite library. The focus
|
||||||
|
# of this script is testing isspace/isalnum/tolower problems with the
|
||||||
|
# FTS2 module. Unfortunately, this code isn't a really principled set
|
||||||
|
# of tests, because it's impossible to know where new uses of these
|
||||||
|
# functions might appear.
|
||||||
|
#
|
||||||
|
# $Id: fts2l.test,v 1.1 2007/03/29 16:30:41 shess Exp $
|
||||||
|
#
|
||||||
|
|
||||||
|
set testdir [file dirname $argv0]
|
||||||
|
source $testdir/tester.tcl
|
||||||
|
|
||||||
|
# If SQLITE_ENABLE_FTS2 is defined, omit this file.
|
||||||
|
ifcapable !fts2 {
|
||||||
|
finish_test
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tests that startsWith() (calls isspace, tolower, isalnum) can handle
|
||||||
|
# hi-bit chars. parseSpec() also calls isalnum here.
|
||||||
|
do_test fts2l-1.1 {
|
||||||
|
execsql "CREATE VIRTUAL TABLE t1 USING fts2(content, \x80)"
|
||||||
|
} {}
|
||||||
|
|
||||||
|
# Additionally tests isspace() call in getToken(), and isalnum() call
|
||||||
|
# in tokenListToIdList().
|
||||||
|
do_test fts2l-1.2 {
|
||||||
|
catch {
|
||||||
|
execsql "CREATE VIRTUAL TABLE t2 USING fts2(content, tokenize \x80)"
|
||||||
|
}
|
||||||
|
sqlite3_errmsg $DB
|
||||||
|
} "unknown tokenizer: \x80"
|
||||||
|
|
||||||
|
# Additionally test final isalnum() in startsWith().
|
||||||
|
do_test fts2l-1.3 {
|
||||||
|
execsql "CREATE VIRTUAL TABLE t3 USING fts2(content, tokenize\x80)"
|
||||||
|
} {}
|
||||||
|
|
||||||
|
# The snippet-generation code has calls to isspace() which are sort of
|
||||||
|
# hard to get to. It finds convenient breakpoints by starting ~40
|
||||||
|
# chars before and after the matched term, and scanning ~10 chars
|
||||||
|
# around that position for isspace() characters. The long word with
|
||||||
|
# embedded hi-bit chars causes one of these isspace() calls to be
|
||||||
|
# exercised. The version with a couple extra spaces should cause the
|
||||||
|
# other isspace() call to be exercised. [Both cases have been tested
|
||||||
|
# in the debugger, but I'm hoping to continue to catch it if simple
|
||||||
|
# constant changes change things slightly.
|
||||||
|
#
|
||||||
|
# The trailing and leading hi-bit chars help with code which tests for
|
||||||
|
# isspace() to coalesce multiple spaces.
|
||||||
|
|
||||||
|
set word "\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80"
|
||||||
|
set phrase1 "$word $word $word target $word $word $word"
|
||||||
|
set phrase2 "$word $word $word target $word $word $word"
|
||||||
|
|
||||||
|
db eval {CREATE VIRTUAL TABLE t4 USING fts2(content)}
|
||||||
|
db eval "INSERT INTO t4 (content) VALUES ('$phrase1')"
|
||||||
|
db eval "INSERT INTO t4 (content) VALUES ('$phrase2')"
|
||||||
|
|
||||||
|
do_test fts2l-1.4 {
|
||||||
|
execsql {SELECT rowid, length(snippet(t4)) FROM t4 WHERE t4 MATCH 'target'}
|
||||||
|
} {1 111 2 117}
|
||||||
|
|
||||||
|
finish_test
|
Reference in New Issue
Block a user