From 6024772ba292a9abc6810dd0b12767d02b47ccf1 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 1 Jan 2015 16:46:10 +0000 Subject: [PATCH] Add a version of the unicode61 tokenizer to fts5. FossilOrigin-Name: d09f7800cf14f73ea86d037107ef80295b2c173a --- ext/fts3/unicode/mkunicode.tcl | 50 ++- ext/fts5/fts5_tcl.c | 66 +++- ext/fts5/fts5_tokenize.c | 283 +++++++++++++++- ext/fts5/fts5_unicode2.c | 363 ++++++++++++++++++++ ext/fts5/test/fts5unicode.test | 39 +++ ext/fts5/test/fts5unicode2.test | 567 ++++++++++++++++++++++++++++++++ main.mk | 4 + manifest | 55 ++-- manifest.uuid | 2 +- 9 files changed, 1369 insertions(+), 60 deletions(-) create mode 100644 ext/fts5/fts5_unicode2.c create mode 100644 ext/fts5/test/fts5unicode.test create mode 100644 ext/fts5/test/fts5unicode2.test diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 2da17c51a5..f1adb5ffde 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -732,8 +732,12 @@ proc print_fileheader {} { */ }] puts "" - puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)" - puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" + if {$::generate_fts5_code} { + puts "#if defined(SQLITE_ENABLE_FTS5)" + } else { + puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)" + puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" + } puts "" puts "#include " puts "" @@ -760,22 +764,38 @@ proc print_test_main {} { # our liking. # proc usage {} { - puts -nonewline stderr "Usage: $::argv0 ?-test? " + puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? " puts stderr " " exit 1 } -if {[llength $argv]!=2 && [llength $argv]!=3} usage -if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage +if {[llength $argv]<2} usage set unicodedata.txt [lindex $argv end] set casefolding.txt [lindex $argv end-1] -set generate_test_code [expr {[llength $argv]==3}] + +set generate_test_code 0 +set generate_fts5_code 0 +set function_prefix "sqlite3Fts" +for {set i 0} {$i < [llength $argv]-2} {incr i} { + switch -- [lindex $argv $i] { + -test { + set generate_test_code 1 + } + -fts5 { + set function_prefix sqlite3Fts5 + set generate_fts5_code 1 + } + default { + usage + } + } +} print_fileheader # Print the isalnum() function to stdout. # set lRange [an_load_separator_ranges] -print_isalnum sqlite3FtsUnicodeIsalnum $lRange +print_isalnum ${function_prefix}UnicodeIsalnum $lRange # Leave a gap between the two generated C functions. # @@ -790,22 +810,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}] print_rd $mappings puts "" puts "" -print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings +print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings puts "" puts "" # Print the fold() function to stdout. # -print_fold sqlite3FtsUnicodeFold +print_fold ${function_prefix}UnicodeFold # Print the test routines and main() function to stdout, if -test # was specified. # if {$::generate_test_code} { - print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange - print_fold_test sqlite3FtsUnicodeFold $mappings + print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange + print_fold_test ${function_prefix}UnicodeFold $mappings print_test_main } -puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" -puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */" +if {$generate_fts5_code} { + puts "#endif /* defined(SQLITE_ENABLE_FTS5) */" +} else { + puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" + puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */" +} diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 575f4f871a..1ce1bba49d 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -518,16 +518,31 @@ static int f5tCreateFunction( return TCL_OK; } +typedef struct F5tTokenizeCtx F5tTokenizeCtx; +struct F5tTokenizeCtx { + Tcl_Obj *pRet; + int bSubst; + const char *zInput; +}; + static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd, int iPos ){ - Tcl_Obj *pRet = (Tcl_Obj*)pCtx; - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iStart)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iEnd)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); + F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; + if( p->bSubst ){ + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos)); + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); + Tcl_ListObjAppendElement( + 0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart) + ); + }else{ + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart)); + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd)); + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos)); + } return SQLITE_OK; } @@ -543,7 +558,6 @@ static int f5tTokenize( int objc, Tcl_Obj *CONST objv[] ){ - char *zName; char *zText; int nText; sqlite3 *db = 0; @@ -554,21 +568,39 @@ static int f5tTokenize( void *pUserdata; int rc; - if( objc!=4 ){ - Tcl_WrongNumArgs(interp, 1, objv, "DB NAME TEXT"); + int nArg; + const char **azArg; + F5tTokenizeCtx ctx; + + if( objc!=4 && objc!=5 ){ + Tcl_WrongNumArgs(interp, 1, objv, "?-subst? DB NAME TEXT"); return TCL_ERROR; } - if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR; - zName = Tcl_GetString(objv[2]); - zText = Tcl_GetStringFromObj(objv[3], &nText); + if( objc==5 ){ + char *zOpt = Tcl_GetString(objv[1]); + if( strcmp("-subst", zOpt) ){ + Tcl_AppendResult(interp, "unrecognized option: ", zOpt, 0); + return TCL_ERROR; + } + } + if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ) return TCL_ERROR; + if( Tcl_SplitList(interp, Tcl_GetString(objv[objc-2]), &nArg, &azArg) ){ + return TCL_ERROR; + } + if( nArg==0 ){ + Tcl_AppendResult(interp, "no such tokenizer: ", 0); + Tcl_Free((void*)azArg); + return TCL_ERROR; + } + zText = Tcl_GetStringFromObj(objv[objc-1], &nText); - rc = pApi->xFindTokenizer(pApi, zName, &pUserdata, &tokenizer); + rc = pApi->xFindTokenizer(pApi, azArg[0], &pUserdata, &tokenizer); if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, "no such tokenizer: ", zName, 0); + Tcl_AppendResult(interp, "no such tokenizer: ", azArg[0], 0); return TCL_ERROR; } - rc = tokenizer.xCreate(pUserdata, 0, 0, &pTok); + rc = tokenizer.xCreate(pUserdata, &azArg[1], nArg-1, &pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0); return TCL_ERROR; @@ -576,7 +608,10 @@ static int f5tTokenize( pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); - rc = tokenizer.xTokenize(pTok, pRet, zText, nText, xTokenizeCb2); + ctx.bSubst = (objc==5); + ctx.pRet = pRet; + ctx.zInput = zText; + rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2); tokenizer.xDelete(pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); @@ -585,6 +620,7 @@ static int f5tTokenize( } + Tcl_Free((void*)azArg); Tcl_SetObjResult(interp, pRet); Tcl_DecrRefCount(pRet); return TCL_OK; diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 5352faa2c6..b23eccd97f 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -15,6 +15,9 @@ #include #include +/************************************************************************** +** Start of unicode61 tokenizer implementation. +*/ /* ** Create a "simple" tokenizer. @@ -69,7 +72,7 @@ static int fts5SimpleTokenize( const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) ){ - int rc; + int rc = SQLITE_OK; int ie; int is = 0; int iPos = 0; @@ -78,7 +81,7 @@ static int fts5SimpleTokenize( int nFold = sizeof(aFold); char *pFold = aFold; - do { + while( is=0xc0 ){ \ + c = sqlite3Utf8Trans1[c-0xc0]; \ + while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + c = (c<<6) + (0x3f & *(zIn++)); \ + } \ + if( c<0x80 \ + || (c&0xFFFFF800)==0xD800 \ + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + } + +#define WRITE_UTF8(zOut, c) { \ + if( c<0x00080 ){ \ + *zOut++ = (unsigned char)(c&0xFF); \ + } \ + else if( c<0x00800 ){ \ + *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ + *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ + } \ + else if( c<0x10000 ){ \ + *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ + *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ + }else{ \ + *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ + *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ + *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ + } \ +} + +#endif /* ifndef SQLITE_AMALGAMATION */ + +typedef struct Unicode61Tokenizer Unicode61Tokenizer; +struct Unicode61Tokenizer { + int bRemoveDiacritic; /* True if remove_diacritics=1 is set */ + int nException; + int *aiException; +}; + +static int fts5UnicodeAddExceptions( + Unicode61Tokenizer *p, /* Tokenizer object */ + const char *z, /* Characters to treat as exceptions */ + int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ +){ + int rc = SQLITE_OK; + int n = strlen(z); + int *aNew; + + if( n>0 ){ + aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int)); + if( aNew ){ + int nNew = p->nException; + const unsigned char *zCsr = (const unsigned char*)z; + const unsigned char *zTerm = (const unsigned char*)&z[n]; + while( zCsriCode ) break; + } + memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); + aNew[i] = iCode; + nNew++; + } + } + p->aiException = aNew; + p->nException = nNew; + }else{ + rc = SQLITE_NOMEM; + } + } + + return rc; +} + +/* +** Return true if the p->aiException[] array contains the value iCode. +*/ +static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ + if( p->nException>0 ){ + int *a = p->aiException; + int iLo = 0; + int iHi = p->nException-1; + + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( iCode==a[iTest] ){ + return 1; + }else if( iCode>a[iTest] ){ + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + } + + return 0; +} + +/* +** Create a "unicode61" tokenizer. +*/ +static int fts5UnicodeCreate( + void *pCtx, + const char **azArg, int nArg, + Fts5Tokenizer **ppOut +){ + int rc = SQLITE_OK; /* Return code */ + Unicode61Tokenizer *p = 0; /* New tokenizer object */ + + if( nArg%2 ){ + rc = SQLITE_ERROR; + }else{ + p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer)); + if( p ){ + int i; + memset(p, 0, sizeof(Unicode61Tokenizer)); + p->bRemoveDiacritic = 1; + for(i=0; rc==SQLITE_OK && ibRemoveDiacritic = (zArg[0]=='1'); + }else + if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ + rc = fts5UnicodeAddExceptions(p, zArg, 1); + }else + if( 0==sqlite3_stricmp(azArg[i], "separators") ){ + rc = fts5UnicodeAddExceptions(p, zArg, 0); + }else{ + rc = SQLITE_ERROR; + } + } + }else{ + rc = SQLITE_NOMEM; + } + *ppOut = (Fts5Tokenizer*)p; + } + return rc; +} + +/* +** Delete a "unicode61" tokenizer. +*/ +static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ + Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; + sqlite3_free(p->aiException); + sqlite3_free(p); + return; +} + +/* +** Return true if, for the purposes of tokenizing with the tokenizer +** passed as the first argument, codepoint iCode is considered a token +** character (not a separator). +*/ +static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ + assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); + return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); +} + +/* +** Tokenize some text using a unicode61 tokenizer. +*/ +static int fts5UnicodeTokenize( + Fts5Tokenizer *pTokenizer, + void *pCtx, + const char *pText, int nText, + int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) +){ + Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; + const unsigned char *zInput = (const unsigned char*)pText; + const unsigned char *zTerm = &zInput[nText]; + const unsigned char *z = zInput; + int rc = SQLITE_OK; + int nBuf = 0; + unsigned char *zBuf = 0; + unsigned char *zOut = 0; + int iPos = 0; + + while( rc==SQLITE_OK && zzBuf ){ + bAlnum = sqlite3Fts5UnicodeIsdiacritic(iCode); + } + + if( bAlnum ){ + int iOut; + + /* Grow the output buffer if required */ + while( (zOut-zBuf)+4>=nBuf ){ + unsigned char *zNew; + nBuf = (nBuf ? nBuf*2 : 128); + zNew = sqlite3_realloc(zBuf, nBuf); + if( zNew==0 ){ + rc = SQLITE_NOMEM; + goto tokenize_finished; + }else{ + zOut = &zNew[zOut-zBuf]; + zBuf = zNew; + } + } + + /* Write the new character to it */ + iOut = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); + if( iOut ) WRITE_UTF8(zOut, iOut); + } + + if( zOut>zBuf && (bAlnum==0 || z>=zTerm) ){ + int ie = (bAlnum ? z : zCode) - zInput; + rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie, iPos); + zOut = zBuf; + iPos++; + } + } + + tokenize_finished: + sqlite3_free(zBuf); + return rc; +} + /************************************************************************** ** Start of porter2 stemmer implementation. */ @@ -477,8 +749,9 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ const char *zName; fts5_tokenizer x; } aBuiltin[] = { - { "porter", { fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize } }, - { "simple", { fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize } } + { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, + { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, + { "simple", {fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize }} }; int rc = SQLITE_OK; /* Return code */ diff --git a/ext/fts5/fts5_unicode2.c b/ext/fts5/fts5_unicode2.c new file mode 100644 index 0000000000..5692bf2b39 --- /dev/null +++ b/ext/fts5/fts5_unicode2.c @@ -0,0 +1,363 @@ +/* +** 2012 May 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + +/* +** DO NOT EDIT THIS MACHINE GENERATED FILE. +*/ + +#if defined(SQLITE_ENABLE_FTS5) + +#include + +/* +** Return true if the argument corresponds to a unicode codepoint +** classified as either a letter or a number. Otherwise false. +** +** The results are undefined if the value passed to this function +** is less than zero. +*/ +int sqlite3Fts5UnicodeIsalnum(int c){ + /* Each unsigned integer in the following array corresponds to a contiguous + ** range of unicode codepoints that are not either letters or numbers (i.e. + ** codepoints for which this function should return 0). + ** + ** The most significant 22 bits in each 32-bit value contain the first + ** codepoint in the range. The least significant 10 bits are used to store + ** the size of the range (always at least 1). In other words, the value + ** ((C<<22) + N) represents a range of N codepoints starting with codepoint + ** C. It is not possible to represent a range larger than 1023 codepoints + ** using this format. + */ + const static unsigned int aEntry[] = { + 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, + 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, + 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, + 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, + 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, + 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, + 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, + 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, + 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, + 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, + 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, + 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, + 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, + 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, + 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, + 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, + 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, + 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, + 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, + 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, + 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, + 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, + 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, + 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, + 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, + 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, + 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, + 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, + 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, + 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, + 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, + 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, + 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, + 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, + 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, + 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, + 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, + 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, + 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, + 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, + 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, + 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, + 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, + 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, + 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, + 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, + 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, + 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, + 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, + 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, + 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, + 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, + 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, + 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, + 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, + 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, + 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, + 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, + 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, + 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, + 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, + 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, + 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, + 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, + 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, + 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, + 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, + 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, + 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, + 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, + 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, + 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, + 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, + 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, + 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, + 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, + 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, + 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, + 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, + 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, + 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, + 0x380400F0, + }; + static const unsigned int aAscii[4] = { + 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, + }; + + if( c<128 ){ + return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); + }else if( c<(1<<22) ){ + unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; + int iRes; + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aEntry[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( aEntry[0]=aEntry[iRes] ); + return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); + } + return 1; +} + + +/* +** If the argument is a codepoint corresponding to a lowercase letter +** in the ASCII range with a diacritic added, return the codepoint +** of the ASCII letter only. For example, if passed 235 - "LATIN +** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER +** E"). The resuls of passing a codepoint that corresponds to an +** uppercase letter are undefined. +*/ +static int remove_diacritic(int c){ + unsigned short aDia[] = { + 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, + 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, + 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, + 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, + 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, + 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, + 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, + 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, + 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, + 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, + 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, + 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, + 62924, 63050, 63082, 63274, 63390, + }; + char aChar[] = { + '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', + 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', + 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', + 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', + 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', + 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', + 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', + 'e', 'i', 'o', 'u', 'y', + }; + + unsigned int key = (((unsigned int)c)<<3) | 0x00000007; + int iRes = 0; + int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aDia[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( key>=aDia[iRes] ); + return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); +}; + + +/* +** Return true if the argument interpreted as a unicode codepoint +** is a diacritical modifier character. +*/ +int sqlite3Fts5UnicodeIsdiacritic(int c){ + unsigned int mask0 = 0x08029FDF; + unsigned int mask1 = 0x000361F8; + if( c<768 || c>817 ) return 0; + return (c < 768+32) ? + (mask0 & (1 << (c-768))) : + (mask1 & (1 << (c-768-32))); +} + + +/* +** Interpret the argument as a unicode codepoint. If the codepoint +** is an upper case character that has a lower case equivalent, +** return the codepoint corresponding to the lower case version. +** Otherwise, return a copy of the argument. +** +** The results are undefined if the value passed to this function +** is less than zero. +*/ +int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ + /* Each entry in the following array defines a rule for folding a range + ** of codepoints to lower case. The rule applies to a range of nRange + ** codepoints starting at codepoint iCode. + ** + ** If the least significant bit in flags is clear, then the rule applies + ** to all nRange codepoints (i.e. all nRange codepoints are upper case and + ** need to be folded). Or, if it is set, then the rule only applies to + ** every second codepoint in the range, starting with codepoint C. + ** + ** The 7 most significant bits in flags are an index into the aiOff[] + ** array. If a specific codepoint C does require folding, then its lower + ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). + ** + ** The contents of this array are generated by parsing the CaseFolding.txt + ** file distributed as part of the "Unicode Character Database". See + ** http://www.unicode.org for details. + */ + static const struct TableEntry { + unsigned short iCode; + unsigned char flags; + unsigned char nRange; + } aEntry[] = { + {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, + {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, + {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, + {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, + {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, + {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, + {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, + {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, + {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, + {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, + {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, + {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, + {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, + {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, + {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, + {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, + {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, + {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, + {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, + {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, + {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, + {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, + {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, + {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, + {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, + {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, + {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, + {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, + {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, + {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, + {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, + {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, + {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, + {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, + {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, + {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, + {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, + {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, + {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, + {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, + {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, + {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, + {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, + {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, + {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, + {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, + {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, + {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, + {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, + {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, + {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, + {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, + {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, + {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, + {65313, 14, 26}, + }; + static const unsigned short aiOff[] = { + 1, 2, 8, 15, 16, 26, 28, 32, + 37, 38, 40, 48, 63, 64, 69, 71, + 79, 80, 116, 202, 203, 205, 206, 207, + 209, 210, 211, 213, 214, 217, 218, 219, + 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, + 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, + 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, + 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, + 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, + 65514, 65521, 65527, 65528, 65529, + }; + + int ret = c; + + assert( c>=0 ); + assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); + + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + int iRes = -1; + + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + int cmp = (c - aEntry[iTest].iCode); + if( cmp>=0 ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( iRes<0 || c>=aEntry[iRes].iCode ); + + if( iRes>=0 ){ + const struct TableEntry *p = &aEntry[iRes]; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; + assert( ret>0 ); + } + } + + if( bRemoveDiacritic ) ret = remove_diacritic(ret); + } + + else if( c>=66560 && c<66600 ){ + ret = c + 40; + } + + return ret; +} +#endif /* defined(SQLITE_ENABLE_FTS5) */ diff --git a/ext/fts5/test/fts5unicode.test b/ext/fts5/test/fts5unicode.test new file mode 100644 index 0000000000..22082b9cde --- /dev/null +++ b/ext/fts5/test/fts5unicode.test @@ -0,0 +1,39 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 tokenizers +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5unicode + +proc tokenize_test {tn tokenizer input output} { + uplevel [list do_test $tn [subst -nocommands { + set ret {} + foreach {z s e p} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] { + lappend ret [set z] + } + set ret + }] [list {*}$output]] +} + +foreach {tn t} {1 simple 2 unicode61} { + tokenize_test 1.$tn.0 $t {A B C D} {a b c d} + tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely} + tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely} + tokenize_test 1.$tn.3 $t {} {} +} + +finish_test + diff --git a/ext/fts5/test/fts5unicode2.test b/ext/fts5/test/fts5unicode2.test new file mode 100644 index 0000000000..b26795f8a9 --- /dev/null +++ b/ext/fts5/test/fts5unicode2.test @@ -0,0 +1,567 @@ +# 2012 May 25 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# The tests in this file focus on testing the "unicode" FTS tokenizer. +# +# This is a modified copy of FTS4 test file "fts4_unicode.test". +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5unicode2 + +proc do_unicode_token_test {tn input res} { + uplevel [list do_test $tn [list \ + sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input + ] [list {*}$res]] +} + +proc do_unicode_token_test2 {tn input res} { + uplevel [list do_test $tn [list \ + sqlite3_fts5_tokenize -subst db "unicode61" $input + ] [list {*}$res]] +} + +proc do_unicode_token_test3 {tn args} { + set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]] + set input [lindex $args end-1] + set res [lindex $args end] + uplevel [list do_test $tn [list \ + sqlite3_fts5_tokenize -subst db $tokenizer $input + ] [list {*}$res]] +} + +do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D} + +do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \ + "0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC" + +do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \ + "0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx" + +# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. +do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF" +do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E" + +do_unicode_token_test 1.5 "The quick brown fox" { + 0 the The 1 quick quick 2 brown brown 3 fox fox +} +do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" { + 0 the The 1 quick quick 2 brown brown 3 fox fox +} + +do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D} +do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC" + +do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \ + "0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx" + +# Check that diacritics are removed if remove_diacritics=1 is specified. +# And that they do not break tokens. +do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx" + +# Title-case mappings work +do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5" + +#------------------------------------------------------------------------- +# +set docs [list { + Enhance the INSERT syntax to allow multiple rows to be inserted via the + VALUES clause. +} { + Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause. +} { + Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp(). +} { + Added the sqlite3_db_readonly() interface. +} { + Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the + ability to add new PRAGMA statements or to override built-in PRAGMAs. +} { + Queries of the form: "SELECT max(x), y FROM table" returns the value of y on + the same row that contains the maximum x value. +} { + Added support for the FTS4 languageid option. +} { + Documented support for the FTS4 content option. This feature has actually + been in the code since version 3.7.9 but is only now considered to be + officially supported. +} { + Pending statements no longer block ROLLBACK. Instead, the pending statement + will return SQLITE_ABORT upon next access after the ROLLBACK. +} { + Improvements to the handling of CSV inputs in the command-line shell +} { + Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be + incorrectly converted into an INNER JOIN if the WHERE clause indexable terms + connected by OR. +}] + +set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS +set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS +set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS +set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS +set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS +set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS +set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS +set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS +set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS +foreach k [array names map] { + lappend mappings [string toupper $k] [lindex $map($k) 0] + lappend mappings $k [lindex $map($k) 1] +} +proc mapdoc {doc} { + set doc [regsub -all {[[:space:]]+} $doc " "] + string map $::mappings [string trim $doc] +} + +do_test 2.0 { + execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); } + foreach doc $docs { + set d [mapdoc $doc] + execsql { INSERT INTO t2 VALUES($d) } + } +} {} + +do_test 2.1 { + set q [mapdoc "row"] + execsql { SELECT * FROM t2 WHERE t2 MATCH $q } +} [list [mapdoc { + Queries of the form: "SELECT max(x), y FROM table" returns the value of y on + the same row that contains the maximum x value. +}]] + +foreach {tn query snippet} { + 2 "row" { + ...returns the value of y on the same [row] that contains + the maximum x value. + } + 3 "ROW" { + ...returns the value of y on the same [row] that contains + the maximum x value. + } + 4 "rollback" { + ...[ROLLBACK]. Instead, the pending statement + will return SQLITE_ABORT upon next access after the [ROLLBACK]. + } + 5 "rOllback" { + ...[ROLLBACK]. Instead, the pending statement + will return SQLITE_ABORT upon next access after the [ROLLBACK]. + } + 6 "lang*" { + Added support for the FTS4 [languageid] option. + } +} { + do_test 2.$tn { + set q [mapdoc $query] + execsql { + SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q + } + } [list [mapdoc $snippet]] +} + +#------------------------------------------------------------------------- +# Make sure the unicode61 tokenizer does not crash if it is passed a +# NULL pointer. +reset_db +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y); + INSERT INTO t1 VALUES(NULL, 'a b c'); +} + +do_execsql_test 3.2 { + SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b' +} {{a [b] c}} + +do_execsql_test 3.3 { + BEGIN; + DELETE FROM t1; + INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b'); + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 VALUES('a b c', NULL); + INSERT INTO t1 VALUES('a x c', NULL); + COMMIT; +} + +do_execsql_test 3.4 { + SELECT * FROM t1 WHERE t1 MATCH 'a b'; +} {{a b c} {}} + +#------------------------------------------------------------------------- +# +reset_db + +do_test 4.1 { + set a "abc\uFFFEdef" + set b "abc\uD800def" + set c "\uFFFEdef" + set d "\uD800def" + execsql { + CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x); + INSERT INTO t1 VALUES($a); + INSERT INTO t1 VALUES($b); + INSERT INTO t1 VALUES($c); + INSERT INTO t1 VALUES($d); + } +} {} + +do_test 4.2 { + set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] + set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] + set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] + set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] + execsql { + INSERT INTO t1 VALUES($a); + INSERT INTO t1 VALUES($b); + INSERT INTO t1 VALUES($c); + INSERT INTO t1 VALUES($d); + } +} {} + +do_test 4.3 { + set a [binary format c* {0xF7 0xBF 0xBF 0xBF}] + set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}] + set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}] + set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}] + execsql { + INSERT INTO t1 VALUES($a); + INSERT INTO t1 VALUES($b); + INSERT INTO t1 VALUES($c); + INSERT INTO t1 VALUES($d); + } +} {} + + +#------------------------------------------------------------------------- + +breakpoint +do_unicode_token_test3 5.1 {tokenchars {}} { + sqlite3_reset sqlite3_column_int +} { + 0 sqlite3 sqlite3 + 1 reset reset + 2 sqlite3 sqlite3 + 3 column column + 4 int int +} + +do_unicode_token_test3 5.2 {tokenchars _} { + sqlite3_reset sqlite3_column_int +} { + 0 sqlite3_reset sqlite3_reset + 1 sqlite3_column_int sqlite3_column_int +} + +do_unicode_token_test3 5.3 {separators xyz} { + Laotianxhorseyrunszfast +} { + 0 laotian Laotian + 1 horse horse + 2 runs runs + 3 fast fast +} + +do_unicode_token_test3 5.4 {tokenchars xyz} { + Laotianxhorseyrunszfast +} { + 0 laotianxhorseyrunszfast Laotianxhorseyrunszfast +} + +do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} { + sqlite3_resetxsqlite3_column_intyhonda_phantom +} { + 0 sqlite3_reset sqlite3_reset + 1 sqlite3_column_int sqlite3_column_int + 2 honda_phantom honda_phantom +} + +do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" { + 0 abc abc 1 def def +} + +do_unicode_token_test3 5.7 \ + "tokenchars \u2444\u2445" \ + "separators \u05D0\u05D1\u05D2" \ + "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \ + [list \ + 0 \u2444fre\u2445sh \u2444fre\u2445sh \ + 1 water water \ + 2 fish fish \ + 3 \u2445timer \u2445timer \ + ] + +# Check that it is not possible to add a standalone diacritic codepoint +# to either separators or tokenchars. +do_unicode_token_test3 5.8 "separators \u0301" \ + "hello\u0301world \u0301helloworld" \ + "0 helloworld hello\u0301world 1 helloworld helloworld" + +do_unicode_token_test3 5.9 "tokenchars \u0301" \ + "hello\u0301world \u0301helloworld" \ + "0 helloworld hello\u0301world 1 helloworld helloworld" + +do_unicode_token_test3 5.10 "separators \u0301" \ + "remove_diacritics 0" \ + "hello\u0301world \u0301helloworld" \ + "0 hello\u0301world hello\u0301world 1 helloworld helloworld" + +do_unicode_token_test3 5.11 "tokenchars \u0301" \ + "remove_diacritics 0" \ + "hello\u0301world \u0301helloworld" \ + "0 hello\u0301world hello\u0301world 1 helloworld helloworld" + + +#------------------------------------------------------------------------- + +proc do_tokenize {tokenizer txt} { + set res [list] + foreach {a b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] { + lappend res $b + } + set res +} + +# Argument $lCodepoint must be a list of codepoints (integers) that +# correspond to whitespace characters. This command creates a string +# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}" +# using tokenizer $tokenizer. The test passes if the tokenizer successfully +# extracts the two 5 character tokens. +# +proc do_isspace_test {tn tokenizer lCp} { + set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp] + set txt "${whitespace}hello${whitespace}world${whitespace}" + uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}] +} + +set tokenizers [list unicode61] +ifcapable icu { lappend tokenizers icu } + +# Some tests to check that the tokenizers can both identify white-space +# codepoints. All codepoints tested below are of type "Zs" in the +# UnicodeData.txt file. +foreach T $tokenizers { + do_isspace_test 6.$T.1 $T 32 + do_isspace_test 6.$T.2 $T 160 + do_isspace_test 6.$T.3 $T 5760 + do_isspace_test 6.$T.4 $T 6158 + do_isspace_test 6.$T.5 $T 8192 + do_isspace_test 6.$T.6 $T 8193 + do_isspace_test 6.$T.7 $T 8194 + do_isspace_test 6.$T.8 $T 8195 + do_isspace_test 6.$T.9 $T 8196 + do_isspace_test 6.$T.10 $T 8197 + do_isspace_test 6.$T.11 $T 8198 + do_isspace_test 6.$T.12 $T 8199 + do_isspace_test 6.$T.13 $T 8200 + do_isspace_test 6.$T.14 $T 8201 + do_isspace_test 6.$T.15 $T 8202 + do_isspace_test 6.$T.16 $T 8239 + do_isspace_test 6.$T.17 $T 8287 + do_isspace_test 6.$T.18 $T 12288 + + do_isspace_test 6.$T.19 $T {32 160 5760 6158} + do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} + do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} + do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} + do_isspace_test 6.$T.23 $T {8287 12288} +} + +#------------------------------------------------------------------------- +# Test that the private use ranges are treated as alphanumeric. +# +foreach {tn1 c} { + 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff +} { + foreach {tn2 config res} { + 1 "" "0 hello*world hello*world" + 2 "separators *" "0 hello hello 1 world world" + } { + set config [string map [list * $c] $config] + set input [string map [list * $c] "hello*world"] + set output [string map [list * $c] $res] + do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output + } +} + +#------------------------------------------------------------------------- +# Cursory test of remove_diacritics=0. +# +# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS +# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS +# 00E4;LATIN SMALL LETTER A WITH DIAERESIS +# 00F6;LATIN SMALL LETTER O WITH DIAERESIS +# +do_execsql_test 8.1.1 " + CREATE VIRTUAL TABLE t3 USING fts5( + content, tokenize='unicode61 remove_diacritics 1' + ); + INSERT INTO t3 VALUES('o'); + INSERT INTO t3 VALUES('a'); + INSERT INTO t3 VALUES('O'); + INSERT INTO t3 VALUES('A'); + INSERT INTO t3 VALUES('\xD6'); + INSERT INTO t3 VALUES('\xC4'); + INSERT INTO t3 VALUES('\xF6'); + INSERT INTO t3 VALUES('\xE4'); +" +do_execsql_test 8.1.2 { + SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC; +} {1 3 5 7} +do_execsql_test 8.1.3 { + SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC; +} {2 4 6 8} +do_execsql_test 8.2.1 { + CREATE VIRTUAL TABLE t4 USING fts5( + content, tokenize='unicode61 remove_diacritics 0' + ); + INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC; +} +do_execsql_test 8.2.2 { + SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC; +} {1 3} +do_execsql_test 8.2.3 { + SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC; +} {2 4} + +#------------------------------------------------------------------------- +# +if 0 { +foreach {tn sql} { + 1 { + CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]); + CREATE VIRTUAL TABLE t6 USING fts4( + tokenize=unicode61 [tokenchars=="] "tokenchars=[]"); + CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]); + } + 2 { + CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= ."); + CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]"); + CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4"); + } + 3 { + CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .'); + CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]'); + CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4'); + } + 4 { + CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`); + CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`); + CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`); + } +} { + do_execsql_test 9.$tn.0 { + DROP TABLE IF EXISTS t5; + DROP TABLE IF EXISTS t5aux; + DROP TABLE IF EXISTS t6; + DROP TABLE IF EXISTS t6aux; + DROP TABLE IF EXISTS t7; + DROP TABLE IF EXISTS t7aux; + } + do_execsql_test 9.$tn.1 $sql + + do_execsql_test 9.$tn.2 { + CREATE VIRTUAL TABLE t5aux USING fts4aux(t5); + INSERT INTO t5 VALUES('one two three/four.five.six'); + SELECT * FROM t5aux; + } { + four.five.six * 1 1 four.five.six 0 1 1 + {one two three} * 1 1 {one two three} 0 1 1 + } + + do_execsql_test 9.$tn.3 { + CREATE VIRTUAL TABLE t6aux USING fts4aux(t6); + INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta'); + SELECT * FROM t6aux; + } { + {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1 + {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1 + } + + do_execsql_test 9.$tn.4 { + CREATE VIRTUAL TABLE t7aux USING fts4aux(t7); + INSERT INTO t7 VALUES('alephxbeth\xC4gimel'); + SELECT * FROM t7aux; + } { + aleph * 1 1 aleph 0 1 1 + beth * 1 1 beth 0 1 1 + gimel * 1 1 gimel 0 1 1 + } +} + +# Check that multiple options are handled correctly. +# +do_execsql_test 10.1 { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61 + "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy" + "separators=a" "separators=a" "tokenchars=a" "tokenchars=a" + ); + + INSERT INTO t1 VALUES('oneatwoxthreeyfour'); + INSERT INTO t1 VALUES('a.single=word'); + CREATE VIRTUAL TABLE t1aux USING fts4aux(t1); + SELECT * FROM t1aux; +} { + .single=word * 1 1 .single=word 0 1 1 + four * 1 1 four 0 1 1 + one * 1 1 one 0 1 1 + three * 1 1 three 0 1 1 + two * 1 1 two 0 1 1 +} + +# Test that case folding happens after tokenization, not before. +# +do_execsql_test 10.2 { + DROP TABLE IF EXISTS t2; + CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB"); + INSERT INTO t2 VALUES('oneatwoBthree'); + INSERT INTO t2 VALUES('onebtwoAthree'); + CREATE VIRTUAL TABLE t2aux USING fts4aux(t2); + SELECT * FROM t2aux; +} { + one * 1 1 one 0 1 1 + onebtwoathree * 1 1 onebtwoathree 0 1 1 + three * 1 1 three 0 1 1 + two * 1 1 two 0 1 1 +} + +# Test that the tokenchars and separators options work with the +# fts3tokenize table. +# +do_execsql_test 11.1 { + CREATE VIRTUAL TABLE ft1 USING fts3tokenize( + "unicode61", "tokenchars=@.", "separators=1234567890" + ); + SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road'; +} { + berlin@street sydney.road +} + +} + +finish_test diff --git a/main.mk b/main.mk index 58044218a7..7a26313b12 100644 --- a/main.mk +++ b/main.mk @@ -81,6 +81,7 @@ LIBOBJ += fts5_hash.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5_tokenize.o +LIBOBJ += fts5_unicode2.o LIBOBJ += fts5parse.o @@ -616,6 +617,9 @@ fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR) fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_tokenize.c +fts5_unicode2.o: $(TOP)/ext/fts5/fts5_unicode2.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_unicode2.c + fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h diff --git a/manifest b/manifest index d8bf03435a..49fabbb73c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Move\sall\sfts5\stest\sfiles\sto\snew\sdirectory\s"ext/fts5/test". -D 2014-12-29T15:59:36.706 +C Add\sa\sversion\sof\sthe\sunicode61\stokenizer\sto\sfts5. +D 2015-01-01T16:46:10.851 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -102,7 +102,7 @@ F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 +F ext/fts3/unicode/mkunicode.tcl 2fa92b916b17ee0fc94129d36969972d463bc016 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 F ext/fts5/fts5.c 37e124e24e5860f9842e5f3ee22129a786c0fd74 F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b @@ -114,26 +114,29 @@ F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853 -F ext/fts5/fts5_tcl.c ce11e46589986b957b89809aabd3936d898d501b -F ext/fts5/fts5_tokenize.c 5d6e785345b0d87d174fcc0653bfacd0d9fd7f2e +F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f +F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb +F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 -F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf w test/fts5aa.test -F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 w test/fts5ab.test -F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b w test/fts5ac.test -F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 w test/fts5ad.test -F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175 w test/fts5ae.test -F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd w test/fts5af.test -F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997 w test/fts5ag.test -F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d w test/fts5ah.test -F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f w test/fts5ai.test -F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 w test/fts5aj.test -F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f w test/fts5ak.test -F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 w test/fts5al.test -F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 w ext/fts5/fts5auxdata.test -F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 w test/fts5ea.test -F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 w test/fts5fault1.test -F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 w ext/fts5/fts5porter.test -F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca w ext/fts5/fts5tokenizer.test +F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf +F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 +F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b +F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 +F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175 +F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd +F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997 +F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d +F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f +F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 +F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f +F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 +F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 +F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 +F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 +F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 +F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca +F ext/fts5/test/fts5unicode.test b9c7bb982e0ee242a0774e636e1888ca32947a83 +F ext/fts5/test/fts5unicode2.test 7b0d64bbb7bfb7b5080e032e068404b42432ee02 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -177,7 +180,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 863a6f5cdcc3a47a9dcbedc9af37d3c0d4172935 +F main.mk 602303f3596d10237f25da030ee1d96065e2e5a8 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1212,7 +1215,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b33fe0dd89f3180c209fa1f9e75d0a7acab12b8e -R c65f16b94aeceea9cda28cb8f092d4a9 +P 7f148edb30103c5f4fee20cd08e38537f9615bf2 +R d01caf1e8e04bd7c1b6e26fb465c90b6 U dan -Z 822a98c34fd542b912bf890d737a0e9f +Z 5c3f4d7bf4502327dfa6eb630b5a26ec diff --git a/manifest.uuid b/manifest.uuid index 7b2535c49c..f67937770d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7f148edb30103c5f4fee20cd08e38537f9615bf2 \ No newline at end of file +d09f7800cf14f73ea86d037107ef80295b2c173a \ No newline at end of file