1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Add a version of the unicode61 tokenizer to fts5.

FossilOrigin-Name: d09f7800cf14f73ea86d037107ef80295b2c173a
This commit is contained in:
dan
2015-01-01 16:46:10 +00:00
parent e716aca24b
commit 6024772ba2
9 changed files with 1369 additions and 60 deletions

View File

@ -732,8 +732,12 @@ proc print_fileheader {} {
*/ */
}] }]
puts "" puts ""
puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)" if {$::generate_fts5_code} {
puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" puts "#if defined(SQLITE_ENABLE_FTS5)"
} else {
puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)"
puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
}
puts "" puts ""
puts "#include <assert.h>" puts "#include <assert.h>"
puts "" puts ""
@ -760,22 +764,38 @@ proc print_test_main {} {
# our liking. # our liking.
# #
proc usage {} { proc usage {} {
puts -nonewline stderr "Usage: $::argv0 ?-test? " puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>" puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
exit 1 exit 1
} }
if {[llength $argv]!=2 && [llength $argv]!=3} usage if {[llength $argv]<2} usage
if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
set unicodedata.txt [lindex $argv end] set unicodedata.txt [lindex $argv end]
set casefolding.txt [lindex $argv end-1] set casefolding.txt [lindex $argv end-1]
set generate_test_code [expr {[llength $argv]==3}]
set generate_test_code 0
set generate_fts5_code 0
set function_prefix "sqlite3Fts"
for {set i 0} {$i < [llength $argv]-2} {incr i} {
switch -- [lindex $argv $i] {
-test {
set generate_test_code 1
}
-fts5 {
set function_prefix sqlite3Fts5
set generate_fts5_code 1
}
default {
usage
}
}
}
print_fileheader print_fileheader
# Print the isalnum() function to stdout. # Print the isalnum() function to stdout.
# #
set lRange [an_load_separator_ranges] set lRange [an_load_separator_ranges]
print_isalnum sqlite3FtsUnicodeIsalnum $lRange print_isalnum ${function_prefix}UnicodeIsalnum $lRange
# Leave a gap between the two generated C functions. # Leave a gap between the two generated C functions.
# #
@ -790,22 +810,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
print_rd $mappings print_rd $mappings
puts "" puts ""
puts "" puts ""
print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
puts "" puts ""
puts "" puts ""
# Print the fold() function to stdout. # Print the fold() function to stdout.
# #
print_fold sqlite3FtsUnicodeFold print_fold ${function_prefix}UnicodeFold
# Print the test routines and main() function to stdout, if -test # Print the test routines and main() function to stdout, if -test
# was specified. # was specified.
# #
if {$::generate_test_code} { if {$::generate_test_code} {
print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
print_fold_test sqlite3FtsUnicodeFold $mappings print_fold_test ${function_prefix}UnicodeFold $mappings
print_test_main print_test_main
} }
puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" if {$generate_fts5_code} {
puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */" puts "#endif /* defined(SQLITE_ENABLE_FTS5) */"
} else {
puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */"
}

View File

@ -518,16 +518,31 @@ static int f5tCreateFunction(
return TCL_OK; return TCL_OK;
} }
typedef struct F5tTokenizeCtx F5tTokenizeCtx;
struct F5tTokenizeCtx {
Tcl_Obj *pRet;
int bSubst;
const char *zInput;
};
static int xTokenizeCb2( static int xTokenizeCb2(
void *pCtx, void *pCtx,
const char *zToken, int nToken, const char *zToken, int nToken,
int iStart, int iEnd, int iPos int iStart, int iEnd, int iPos
){ ){
Tcl_Obj *pRet = (Tcl_Obj*)pCtx; F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); if( p->bSubst ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iStart)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iEnd)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); Tcl_ListObjAppendElement(
0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart)
);
}else{
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos));
}
return SQLITE_OK; return SQLITE_OK;
} }
@ -543,7 +558,6 @@ static int f5tTokenize(
int objc, int objc,
Tcl_Obj *CONST objv[] Tcl_Obj *CONST objv[]
){ ){
char *zName;
char *zText; char *zText;
int nText; int nText;
sqlite3 *db = 0; sqlite3 *db = 0;
@ -554,21 +568,39 @@ static int f5tTokenize(
void *pUserdata; void *pUserdata;
int rc; int rc;
if( objc!=4 ){ int nArg;
Tcl_WrongNumArgs(interp, 1, objv, "DB NAME TEXT"); const char **azArg;
F5tTokenizeCtx ctx;
if( objc!=4 && objc!=5 ){
Tcl_WrongNumArgs(interp, 1, objv, "?-subst? DB NAME TEXT");
return TCL_ERROR; return TCL_ERROR;
} }
if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR; if( objc==5 ){
zName = Tcl_GetString(objv[2]); char *zOpt = Tcl_GetString(objv[1]);
zText = Tcl_GetStringFromObj(objv[3], &nText); if( strcmp("-subst", zOpt) ){
Tcl_AppendResult(interp, "unrecognized option: ", zOpt, 0);
return TCL_ERROR;
}
}
if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ) return TCL_ERROR;
if( Tcl_SplitList(interp, Tcl_GetString(objv[objc-2]), &nArg, &azArg) ){
return TCL_ERROR;
}
if( nArg==0 ){
Tcl_AppendResult(interp, "no such tokenizer: ", 0);
Tcl_Free((void*)azArg);
return TCL_ERROR;
}
zText = Tcl_GetStringFromObj(objv[objc-1], &nText);
rc = pApi->xFindTokenizer(pApi, zName, &pUserdata, &tokenizer); rc = pApi->xFindTokenizer(pApi, azArg[0], &pUserdata, &tokenizer);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "no such tokenizer: ", zName, 0); Tcl_AppendResult(interp, "no such tokenizer: ", azArg[0], 0);
return TCL_ERROR; return TCL_ERROR;
} }
rc = tokenizer.xCreate(pUserdata, 0, 0, &pTok); rc = tokenizer.xCreate(pUserdata, &azArg[1], nArg-1, &pTok);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0); Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0);
return TCL_ERROR; return TCL_ERROR;
@ -576,7 +608,10 @@ static int f5tTokenize(
pRet = Tcl_NewObj(); pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet); Tcl_IncrRefCount(pRet);
rc = tokenizer.xTokenize(pTok, pRet, zText, nText, xTokenizeCb2); ctx.bSubst = (objc==5);
ctx.pRet = pRet;
ctx.zInput = zText;
rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);
tokenizer.xDelete(pTok); tokenizer.xDelete(pTok);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
@ -585,6 +620,7 @@ static int f5tTokenize(
} }
Tcl_Free((void*)azArg);
Tcl_SetObjResult(interp, pRet); Tcl_SetObjResult(interp, pRet);
Tcl_DecrRefCount(pRet); Tcl_DecrRefCount(pRet);
return TCL_OK; return TCL_OK;

View File

@ -15,6 +15,9 @@
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
/**************************************************************************
** Start of unicode61 tokenizer implementation.
*/
/* /*
** Create a "simple" tokenizer. ** Create a "simple" tokenizer.
@ -69,7 +72,7 @@ static int fts5SimpleTokenize(
const char *pText, int nText, const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
){ ){
int rc; int rc = SQLITE_OK;
int ie; int ie;
int is = 0; int is = 0;
int iPos = 0; int iPos = 0;
@ -78,7 +81,7 @@ static int fts5SimpleTokenize(
int nFold = sizeof(aFold); int nFold = sizeof(aFold);
char *pFold = aFold; char *pFold = aFold;
do { while( is<nText && rc==SQLITE_OK ){
int nByte; int nByte;
/* Skip any leading divider characters. */ /* Skip any leading divider characters. */
@ -110,13 +113,282 @@ static int fts5SimpleTokenize(
rc = xToken(pCtx, pFold, nByte, is, ie, iPos); rc = xToken(pCtx, pFold, nByte, is, ie, iPos);
iPos++; iPos++;
is = ie+1; is = ie+1;
}while( is<nText && rc==SQLITE_OK ); }
if( pFold!=aFold ) sqlite3_free(pFold); if( pFold!=aFold ) sqlite3_free(pFold);
if( rc==SQLITE_DONE ) rc = SQLITE_OK; if( rc==SQLITE_DONE ) rc = SQLITE_OK;
return rc; return rc;
} }
/**************************************************************************
** Start of unicode61 tokenizer implementation.
*/
/*
** Functions in fts5_unicode2.c.
*/
int sqlite3Fts5UnicodeIsalnum(int c);
int sqlite3Fts5UnicodeIsdiacritic(int c);
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
/*
** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
** from the sqlite3 source file utf.c. If this file is compiled as part
** of the amalgamation, they are not required.
*/
#ifndef SQLITE_AMALGAMATION
static const unsigned char sqlite3Utf8Trans1[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};
#define READ_UTF8(zIn, zTerm, c) \
c = *(zIn++); \
if( c>=0xc0 ){ \
c = sqlite3Utf8Trans1[c-0xc0]; \
while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
c = (c<<6) + (0x3f & *(zIn++)); \
} \
if( c<0x80 \
|| (c&0xFFFFF800)==0xD800 \
|| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
}
#define WRITE_UTF8(zOut, c) { \
if( c<0x00080 ){ \
*zOut++ = (unsigned char)(c&0xFF); \
} \
else if( c<0x00800 ){ \
*zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \
*zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
} \
else if( c<0x10000 ){ \
*zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \
*zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
*zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
}else{ \
*zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \
*zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \
*zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
*zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
} \
}
#endif /* ifndef SQLITE_AMALGAMATION */
typedef struct Unicode61Tokenizer Unicode61Tokenizer;
struct Unicode61Tokenizer {
int bRemoveDiacritic; /* True if remove_diacritics=1 is set */
int nException;
int *aiException;
};
static int fts5UnicodeAddExceptions(
Unicode61Tokenizer *p, /* Tokenizer object */
const char *z, /* Characters to treat as exceptions */
int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */
){
int rc = SQLITE_OK;
int n = strlen(z);
int *aNew;
if( n>0 ){
aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int));
if( aNew ){
int nNew = p->nException;
const unsigned char *zCsr = (const unsigned char*)z;
const unsigned char *zTerm = (const unsigned char*)&z[n];
while( zCsr<zTerm ){
int iCode;
int bToken;
READ_UTF8(zCsr, zTerm, iCode);
bToken = sqlite3Fts5UnicodeIsalnum(iCode);
assert( (bToken==0 || bToken==1) );
assert( (bTokenChars==0 || bTokenChars==1) );
if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
int i;
for(i=0; i<nNew; i++){
if( aNew[i]>iCode ) break;
}
memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int));
aNew[i] = iCode;
nNew++;
}
}
p->aiException = aNew;
p->nException = nNew;
}else{
rc = SQLITE_NOMEM;
}
}
return rc;
}
/*
** Return true if the p->aiException[] array contains the value iCode.
*/
static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){
if( p->nException>0 ){
int *a = p->aiException;
int iLo = 0;
int iHi = p->nException-1;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( iCode==a[iTest] ){
return 1;
}else if( iCode>a[iTest] ){
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
}
return 0;
}
/*
** Create a "unicode61" tokenizer.
*/
static int fts5UnicodeCreate(
void *pCtx,
const char **azArg, int nArg,
Fts5Tokenizer **ppOut
){
int rc = SQLITE_OK; /* Return code */
Unicode61Tokenizer *p = 0; /* New tokenizer object */
if( nArg%2 ){
rc = SQLITE_ERROR;
}else{
p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
if( p ){
int i;
memset(p, 0, sizeof(Unicode61Tokenizer));
p->bRemoveDiacritic = 1;
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
rc = SQLITE_ERROR;
}
p->bRemoveDiacritic = (zArg[0]=='1');
}else
if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
rc = fts5UnicodeAddExceptions(p, zArg, 1);
}else
if( 0==sqlite3_stricmp(azArg[i], "separators") ){
rc = fts5UnicodeAddExceptions(p, zArg, 0);
}else{
rc = SQLITE_ERROR;
}
}
}else{
rc = SQLITE_NOMEM;
}
*ppOut = (Fts5Tokenizer*)p;
}
return rc;
}
/*
** Delete a "unicode61" tokenizer.
*/
static void fts5UnicodeDelete(Fts5Tokenizer *pTok){
Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok;
sqlite3_free(p->aiException);
sqlite3_free(p);
return;
}
/*
** Return true if, for the purposes of tokenizing with the tokenizer
** passed as the first argument, codepoint iCode is considered a token
** character (not a separator).
*/
static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
}
/*
** Tokenize some text using a unicode61 tokenizer.
*/
static int fts5UnicodeTokenize(
Fts5Tokenizer *pTokenizer,
void *pCtx,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
){
Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
const unsigned char *zInput = (const unsigned char*)pText;
const unsigned char *zTerm = &zInput[nText];
const unsigned char *z = zInput;
int rc = SQLITE_OK;
int nBuf = 0;
unsigned char *zBuf = 0;
unsigned char *zOut = 0;
int iPos = 0;
while( rc==SQLITE_OK && z<zTerm ){
int iCode;
int bAlnum;
const unsigned char *zStart;
const unsigned char *zCode;
if( zOut==zBuf ) zStart = z;
zCode = z;
READ_UTF8(z, zTerm, iCode);
bAlnum = fts5UnicodeIsAlnum(p, iCode);
if( bAlnum==0 && zOut>zBuf ){
bAlnum = sqlite3Fts5UnicodeIsdiacritic(iCode);
}
if( bAlnum ){
int iOut;
/* Grow the output buffer if required */
while( (zOut-zBuf)+4>=nBuf ){
unsigned char *zNew;
nBuf = (nBuf ? nBuf*2 : 128);
zNew = sqlite3_realloc(zBuf, nBuf);
if( zNew==0 ){
rc = SQLITE_NOMEM;
goto tokenize_finished;
}else{
zOut = &zNew[zOut-zBuf];
zBuf = zNew;
}
}
/* Write the new character to it */
iOut = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic);
if( iOut ) WRITE_UTF8(zOut, iOut);
}
if( zOut>zBuf && (bAlnum==0 || z>=zTerm) ){
int ie = (bAlnum ? z : zCode) - zInput;
rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie, iPos);
zOut = zBuf;
iPos++;
}
}
tokenize_finished:
sqlite3_free(zBuf);
return rc;
}
/************************************************************************** /**************************************************************************
** Start of porter2 stemmer implementation. ** Start of porter2 stemmer implementation.
*/ */
@ -477,8 +749,9 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){
const char *zName; const char *zName;
fts5_tokenizer x; fts5_tokenizer x;
} aBuiltin[] = { } aBuiltin[] = {
{ "porter", { fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize } }, { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
{ "simple", { fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize } } { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
{ "simple", {fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize }}
}; };
int rc = SQLITE_OK; /* Return code */ int rc = SQLITE_OK; /* Return code */

363
ext/fts5/fts5_unicode2.c Normal file
View File

@ -0,0 +1,363 @@
/*
** 2012 May 25
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
#if defined(SQLITE_ENABLE_FTS5)
#include <assert.h>
/*
** Return true if the argument corresponds to a unicode codepoint
** classified as either a letter or a number. Otherwise false.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeIsalnum(int c){
/* Each unsigned integer in the following array corresponds to a contiguous
** range of unicode codepoints that are not either letters or numbers (i.e.
** codepoints for which this function should return 0).
**
** The most significant 22 bits in each 32-bit value contain the first
** codepoint in the range. The least significant 10 bits are used to store
** the size of the range (always at least 1). In other words, the value
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
** C. It is not possible to represent a range larger than 1023 codepoints
** using this format.
*/
const static unsigned int aEntry[] = {
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
0x380400F0,
};
static const unsigned int aAscii[4] = {
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
};
if( c<128 ){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aEntry[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( aEntry[0]<key );
assert( key>=aEntry[iRes] );
return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
}
return 1;
}
/*
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int remove_diacritic(int c){
unsigned short aDia[] = {
0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
62924, 63050, 63082, 63274, 63390,
};
char aChar[] = {
'\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
'\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
'e', 'i', 'o', 'u', 'y',
};
unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
int iRes = 0;
int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aDia[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( key>=aDia[iRes] );
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
};
/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
int sqlite3Fts5UnicodeIsdiacritic(int c){
unsigned int mask0 = 0x08029FDF;
unsigned int mask1 = 0x000361F8;
if( c<768 || c>817 ) return 0;
return (c < 768+32) ?
(mask0 & (1 << (c-768))) :
(mask1 & (1 << (c-768-32)));
}
/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
/* Each entry in the following array defines a rule for folding a range
** of codepoints to lower case. The rule applies to a range of nRange
** codepoints starting at codepoint iCode.
**
** If the least significant bit in flags is clear, then the rule applies
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
** need to be folded). Or, if it is set, then the rule only applies to
** every second codepoint in the range, starting with codepoint C.
**
** The 7 most significant bits in flags are an index into the aiOff[]
** array. If a specific codepoint C does require folding, then its lower
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
**
** The contents of this array are generated by parsing the CaseFolding.txt
** file distributed as part of the "Unicode Character Database". See
** http://www.unicode.org for details.
*/
static const struct TableEntry {
unsigned short iCode;
unsigned char flags;
unsigned char nRange;
} aEntry[] = {
{65, 14, 26}, {181, 64, 1}, {192, 14, 23},
{216, 14, 7}, {256, 1, 48}, {306, 1, 6},
{313, 1, 16}, {330, 1, 46}, {376, 116, 1},
{377, 1, 6}, {383, 104, 1}, {385, 50, 1},
{386, 1, 4}, {390, 44, 1}, {391, 0, 1},
{393, 42, 2}, {395, 0, 1}, {398, 32, 1},
{399, 38, 1}, {400, 40, 1}, {401, 0, 1},
{403, 42, 1}, {404, 46, 1}, {406, 52, 1},
{407, 48, 1}, {408, 0, 1}, {412, 52, 1},
{413, 54, 1}, {415, 56, 1}, {416, 1, 6},
{422, 60, 1}, {423, 0, 1}, {425, 60, 1},
{428, 0, 1}, {430, 60, 1}, {431, 0, 1},
{433, 58, 2}, {435, 1, 4}, {439, 62, 1},
{440, 0, 1}, {444, 0, 1}, {452, 2, 1},
{453, 0, 1}, {455, 2, 1}, {456, 0, 1},
{458, 2, 1}, {459, 1, 18}, {478, 1, 18},
{497, 2, 1}, {498, 1, 4}, {502, 122, 1},
{503, 134, 1}, {504, 1, 40}, {544, 110, 1},
{546, 1, 18}, {570, 70, 1}, {571, 0, 1},
{573, 108, 1}, {574, 68, 1}, {577, 0, 1},
{579, 106, 1}, {580, 28, 1}, {581, 30, 1},
{582, 1, 10}, {837, 36, 1}, {880, 1, 4},
{886, 0, 1}, {902, 18, 1}, {904, 16, 3},
{908, 26, 1}, {910, 24, 2}, {913, 14, 17},
{931, 14, 9}, {962, 0, 1}, {975, 4, 1},
{976, 140, 1}, {977, 142, 1}, {981, 146, 1},
{982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
{1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
{1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
{1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
{1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
{1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
{4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
{7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
{7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
{7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
{8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
{8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
{8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
{8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
{8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
{8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
{8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
{8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
{8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
{11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
{11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
{11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
{11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
{11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
{42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
{42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
{42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
{42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
{65313, 14, 26},
};
static const unsigned short aiOff[] = {
1, 2, 8, 15, 16, 26, 28, 32,
37, 38, 40, 48, 63, 64, 69, 71,
79, 80, 116, 202, 203, 205, 206, 207,
209, 210, 211, 213, 214, 217, 218, 219,
775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
65514, 65521, 65527, 65528, 65529,
};
int ret = c;
assert( c>=0 );
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
if( cmp>=0 ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( iRes<0 || c>=aEntry[iRes].iCode );
if( iRes>=0 ){
const struct TableEntry *p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
}
if( bRemoveDiacritic ) ret = remove_diacritic(ret);
}
else if( c>=66560 && c<66600 ){
ret = c + 40;
}
return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS5) */

View File

@ -0,0 +1,39 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 tokenizers
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
set testprefix fts5unicode
proc tokenize_test {tn tokenizer input output} {
uplevel [list do_test $tn [subst -nocommands {
set ret {}
foreach {z s e p} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] {
lappend ret [set z]
}
set ret
}] [list {*}$output]]
}
foreach {tn t} {1 simple 2 unicode61} {
tokenize_test 1.$tn.0 $t {A B C D} {a b c d}
tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely}
tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely}
tokenize_test 1.$tn.3 $t {} {}
}
finish_test

View File

@ -0,0 +1,567 @@
# 2012 May 25
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#
# This is a modified copy of FTS4 test file "fts4_unicode.test".
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
set testprefix fts5unicode2
proc do_unicode_token_test {tn input res} {
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input
] [list {*}$res]]
}
proc do_unicode_token_test2 {tn input res} {
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db "unicode61" $input
] [list {*}$res]]
}
proc do_unicode_token_test3 {tn args} {
set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
set input [lindex $args end-1]
set res [lindex $args end]
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db $tokenizer $input
] [list {*}$res]]
}
do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
"0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC"
do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
"0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx"
# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E"
do_unicode_token_test 1.5 "The quick brown fox" {
0 the The 1 quick quick 2 brown brown 3 fox fox
}
do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
0 the The 1 quick quick 2 brown brown 3 fox fox
}
do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC"
do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \
"0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx"
# Check that diacritics are removed if remove_diacritics=1 is specified.
# And that they do not break tokens.
do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx"
# Title-case mappings work
do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5"
#-------------------------------------------------------------------------
#
set docs [list {
Enhance the INSERT syntax to allow multiple rows to be inserted via the
VALUES clause.
} {
Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
} {
Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
} {
Added the sqlite3_db_readonly() interface.
} {
Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
ability to add new PRAGMA statements or to override built-in PRAGMAs.
} {
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
the same row that contains the maximum x value.
} {
Added support for the FTS4 languageid option.
} {
Documented support for the FTS4 content option. This feature has actually
been in the code since version 3.7.9 but is only now considered to be
officially supported.
} {
Pending statements no longer block ROLLBACK. Instead, the pending statement
will return SQLITE_ABORT upon next access after the ROLLBACK.
} {
Improvements to the handling of CSV inputs in the command-line shell
} {
Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
connected by OR.
}]
set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS
set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS
set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS
set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS
set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS
set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS
set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS
set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS
set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS
foreach k [array names map] {
lappend mappings [string toupper $k] [lindex $map($k) 0]
lappend mappings $k [lindex $map($k) 1]
}
proc mapdoc {doc} {
set doc [regsub -all {[[:space:]]+} $doc " "]
string map $::mappings [string trim $doc]
}
do_test 2.0 {
execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }
foreach doc $docs {
set d [mapdoc $doc]
execsql { INSERT INTO t2 VALUES($d) }
}
} {}
do_test 2.1 {
set q [mapdoc "row"]
execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
} [list [mapdoc {
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
the same row that contains the maximum x value.
}]]
foreach {tn query snippet} {
2 "row" {
...returns the value of y on the same [row] that contains
the maximum x value.
}
3 "ROW" {
...returns the value of y on the same [row] that contains
the maximum x value.
}
4 "rollback" {
...[ROLLBACK]. Instead, the pending statement
will return SQLITE_ABORT upon next access after the [ROLLBACK].
}
5 "rOllback" {
...[ROLLBACK]. Instead, the pending statement
will return SQLITE_ABORT upon next access after the [ROLLBACK].
}
6 "lang*" {
Added support for the FTS4 [languageid] option.
}
} {
do_test 2.$tn {
set q [mapdoc $query]
execsql {
SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q
}
} [list [mapdoc $snippet]]
}
#-------------------------------------------------------------------------
# Make sure the unicode61 tokenizer does not crash if it is passed a
# NULL pointer.
reset_db
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);
INSERT INTO t1 VALUES(NULL, 'a b c');
}
do_execsql_test 3.2 {
SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'
} {{a [b] c}}
do_execsql_test 3.3 {
BEGIN;
DELETE FROM t1;
INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 VALUES('a b c', NULL);
INSERT INTO t1 VALUES('a x c', NULL);
COMMIT;
}
do_execsql_test 3.4 {
SELECT * FROM t1 WHERE t1 MATCH 'a b';
} {{a b c} {}}
#-------------------------------------------------------------------------
#
reset_db
do_test 4.1 {
set a "abc\uFFFEdef"
set b "abc\uD800def"
set c "\uFFFEdef"
set d "\uD800def"
execsql {
CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
do_test 4.2 {
set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
do_test 4.3 {
set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
#-------------------------------------------------------------------------
breakpoint
do_unicode_token_test3 5.1 {tokenchars {}} {
sqlite3_reset sqlite3_column_int
} {
0 sqlite3 sqlite3
1 reset reset
2 sqlite3 sqlite3
3 column column
4 int int
}
do_unicode_token_test3 5.2 {tokenchars _} {
sqlite3_reset sqlite3_column_int
} {
0 sqlite3_reset sqlite3_reset
1 sqlite3_column_int sqlite3_column_int
}
do_unicode_token_test3 5.3 {separators xyz} {
Laotianxhorseyrunszfast
} {
0 laotian Laotian
1 horse horse
2 runs runs
3 fast fast
}
do_unicode_token_test3 5.4 {tokenchars xyz} {
Laotianxhorseyrunszfast
} {
0 laotianxhorseyrunszfast Laotianxhorseyrunszfast
}
do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
sqlite3_resetxsqlite3_column_intyhonda_phantom
} {
0 sqlite3_reset sqlite3_reset
1 sqlite3_column_int sqlite3_column_int
2 honda_phantom honda_phantom
}
do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
0 abc abc 1 def def
}
do_unicode_token_test3 5.7 \
"tokenchars \u2444\u2445" \
"separators \u05D0\u05D1\u05D2" \
"\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
[list \
0 \u2444fre\u2445sh \u2444fre\u2445sh \
1 water water \
2 fish fish \
3 \u2445timer \u2445timer \
]
# Check that it is not possible to add a standalone diacritic codepoint
# to either separators or tokenchars.
do_unicode_token_test3 5.8 "separators \u0301" \
"hello\u0301world \u0301helloworld" \
"0 helloworld hello\u0301world 1 helloworld helloworld"
do_unicode_token_test3 5.9 "tokenchars \u0301" \
"hello\u0301world \u0301helloworld" \
"0 helloworld hello\u0301world 1 helloworld helloworld"
do_unicode_token_test3 5.10 "separators \u0301" \
"remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
"0 hello\u0301world hello\u0301world 1 helloworld helloworld"
do_unicode_token_test3 5.11 "tokenchars \u0301" \
"remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
"0 hello\u0301world hello\u0301world 1 helloworld helloworld"
#-------------------------------------------------------------------------
proc do_tokenize {tokenizer txt} {
set res [list]
foreach {a b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
lappend res $b
}
set res
}
# Argument $lCodepoint must be a list of codepoints (integers) that
# correspond to whitespace characters. This command creates a string
# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}"
# using tokenizer $tokenizer. The test passes if the tokenizer successfully
# extracts the two 5 character tokens.
#
proc do_isspace_test {tn tokenizer lCp} {
set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp]
set txt "${whitespace}hello${whitespace}world${whitespace}"
uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
}
set tokenizers [list unicode61]
ifcapable icu { lappend tokenizers icu }
# Some tests to check that the tokenizers can both identify white-space
# codepoints. All codepoints tested below are of type "Zs" in the
# UnicodeData.txt file.
foreach T $tokenizers {
do_isspace_test 6.$T.1 $T 32
do_isspace_test 6.$T.2 $T 160
do_isspace_test 6.$T.3 $T 5760
do_isspace_test 6.$T.4 $T 6158
do_isspace_test 6.$T.5 $T 8192
do_isspace_test 6.$T.6 $T 8193
do_isspace_test 6.$T.7 $T 8194
do_isspace_test 6.$T.8 $T 8195
do_isspace_test 6.$T.9 $T 8196
do_isspace_test 6.$T.10 $T 8197
do_isspace_test 6.$T.11 $T 8198
do_isspace_test 6.$T.12 $T 8199
do_isspace_test 6.$T.13 $T 8200
do_isspace_test 6.$T.14 $T 8201
do_isspace_test 6.$T.15 $T 8202
do_isspace_test 6.$T.16 $T 8239
do_isspace_test 6.$T.17 $T 8287
do_isspace_test 6.$T.18 $T 12288
do_isspace_test 6.$T.19 $T {32 160 5760 6158}
do_isspace_test 6.$T.20 $T {8192 8193 8194 8195}
do_isspace_test 6.$T.21 $T {8196 8197 8198 8199}
do_isspace_test 6.$T.22 $T {8200 8201 8202 8239}
do_isspace_test 6.$T.23 $T {8287 12288}
}
#-------------------------------------------------------------------------
# Test that the private use ranges are treated as alphanumeric.
#
foreach {tn1 c} {
1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
} {
foreach {tn2 config res} {
1 "" "0 hello*world hello*world"
2 "separators *" "0 hello hello 1 world world"
} {
set config [string map [list * $c] $config]
set input [string map [list * $c] "hello*world"]
set output [string map [list * $c] $res]
do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output
}
}
#-------------------------------------------------------------------------
# Cursory test of remove_diacritics=0.
#
# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS
# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS
# 00E4;LATIN SMALL LETTER A WITH DIAERESIS
# 00F6;LATIN SMALL LETTER O WITH DIAERESIS
#
do_execsql_test 8.1.1 "
CREATE VIRTUAL TABLE t3 USING fts5(
content, tokenize='unicode61 remove_diacritics 1'
);
INSERT INTO t3 VALUES('o');
INSERT INTO t3 VALUES('a');
INSERT INTO t3 VALUES('O');
INSERT INTO t3 VALUES('A');
INSERT INTO t3 VALUES('\xD6');
INSERT INTO t3 VALUES('\xC4');
INSERT INTO t3 VALUES('\xF6');
INSERT INTO t3 VALUES('\xE4');
"
do_execsql_test 8.1.2 {
SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;
} {1 3 5 7}
do_execsql_test 8.1.3 {
SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;
} {2 4 6 8}
do_execsql_test 8.2.1 {
CREATE VIRTUAL TABLE t4 USING fts5(
content, tokenize='unicode61 remove_diacritics 0'
);
INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;
}
do_execsql_test 8.2.2 {
SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;
} {1 3}
do_execsql_test 8.2.3 {
SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
} {2 4}
#-------------------------------------------------------------------------
#
if 0 {
foreach {tn sql} {
1 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
CREATE VIRTUAL TABLE t6 USING fts4(
tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
}
2 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
}
3 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
}
4 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
}
} {
do_execsql_test 9.$tn.0 {
DROP TABLE IF EXISTS t5;
DROP TABLE IF EXISTS t5aux;
DROP TABLE IF EXISTS t6;
DROP TABLE IF EXISTS t6aux;
DROP TABLE IF EXISTS t7;
DROP TABLE IF EXISTS t7aux;
}
do_execsql_test 9.$tn.1 $sql
do_execsql_test 9.$tn.2 {
CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
INSERT INTO t5 VALUES('one two three/four.five.six');
SELECT * FROM t5aux;
} {
four.five.six * 1 1 four.five.six 0 1 1
{one two three} * 1 1 {one two three} 0 1 1
}
do_execsql_test 9.$tn.3 {
CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
SELECT * FROM t6aux;
} {
{alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1
{delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
}
do_execsql_test 9.$tn.4 {
CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
SELECT * FROM t7aux;
} {
aleph * 1 1 aleph 0 1 1
beth * 1 1 beth 0 1 1
gimel * 1 1 gimel 0 1 1
}
}
# Check that multiple options are handled correctly.
#
do_execsql_test 10.1 {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
"tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
"separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
);
INSERT INTO t1 VALUES('oneatwoxthreeyfour');
INSERT INTO t1 VALUES('a.single=word');
CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
SELECT * FROM t1aux;
} {
.single=word * 1 1 .single=word 0 1 1
four * 1 1 four 0 1 1
one * 1 1 one 0 1 1
three * 1 1 three 0 1 1
two * 1 1 two 0 1 1
}
# Test that case folding happens after tokenization, not before.
#
do_execsql_test 10.2 {
DROP TABLE IF EXISTS t2;
CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
INSERT INTO t2 VALUES('oneatwoBthree');
INSERT INTO t2 VALUES('onebtwoAthree');
CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
SELECT * FROM t2aux;
} {
one * 1 1 one 0 1 1
onebtwoathree * 1 1 onebtwoathree 0 1 1
three * 1 1 three 0 1 1
two * 1 1 two 0 1 1
}
# Test that the tokenchars and separators options work with the
# fts3tokenize table.
#
do_execsql_test 11.1 {
CREATE VIRTUAL TABLE ft1 USING fts3tokenize(
"unicode61", "tokenchars=@.", "separators=1234567890"
);
SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';
} {
berlin@street sydney.road
}
}
finish_test

View File

@ -81,6 +81,7 @@ LIBOBJ += fts5_hash.o
LIBOBJ += fts5_index.o LIBOBJ += fts5_index.o
LIBOBJ += fts5_storage.o LIBOBJ += fts5_storage.o
LIBOBJ += fts5_tokenize.o LIBOBJ += fts5_tokenize.o
LIBOBJ += fts5_unicode2.o
LIBOBJ += fts5parse.o LIBOBJ += fts5parse.o
@ -616,6 +617,9 @@ fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR)
fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR) fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_tokenize.c $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_tokenize.c
fts5_unicode2.o: $(TOP)/ext/fts5/fts5_unicode2.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_unicode2.c
fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon
cp $(TOP)/ext/fts5/fts5parse.y . cp $(TOP)/ext/fts5/fts5parse.y .
rm -f fts5parse.h rm -f fts5parse.h

View File

@ -1,5 +1,5 @@
C Move\sall\sfts5\stest\sfiles\sto\snew\sdirectory\s"ext/fts5/test". C Add\sa\sversion\sof\sthe\sunicode61\stokenizer\sto\sfts5.
D 2014-12-29T15:59:36.706 D 2015-01-01T16:46:10.851
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -102,7 +102,7 @@ F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts3/unicode/mkunicode.tcl 2fa92b916b17ee0fc94129d36969972d463bc016
F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786
F ext/fts5/fts5.c 37e124e24e5860f9842e5f3ee22129a786c0fd74 F ext/fts5/fts5.c 37e124e24e5860f9842e5f3ee22129a786c0fd74
F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b
@ -114,26 +114,29 @@ F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5
F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279
F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f
F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853 F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853
F ext/fts5/fts5_tcl.c ce11e46589986b957b89809aabd3936d898d501b F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f
F ext/fts5/fts5_tokenize.c 5d6e785345b0d87d174fcc0653bfacd0d9fd7f2e F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb
F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf w test/fts5aa.test F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf
F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 w test/fts5ab.test F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3
F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b w test/fts5ac.test F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b
F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 w test/fts5ad.test F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324
F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175 w test/fts5ae.test F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175
F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd w test/fts5af.test F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd
F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997 w test/fts5ag.test F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997
F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d w test/fts5ah.test F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d
F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f w test/fts5ai.test F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f
F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 w test/fts5aj.test F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3
F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f w test/fts5ak.test F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f
F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 w test/fts5al.test F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1
F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 w ext/fts5/fts5auxdata.test F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05
F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 w test/fts5ea.test F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8
F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 w test/fts5fault1.test F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7
F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 w ext/fts5/fts5porter.test F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562
F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca w ext/fts5/fts5tokenizer.test F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca
F ext/fts5/test/fts5unicode.test b9c7bb982e0ee242a0774e636e1888ca32947a83
F ext/fts5/test/fts5unicode2.test 7b0d64bbb7bfb7b5080e032e068404b42432ee02
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb
F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
@ -177,7 +180,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
F main.mk 863a6f5cdcc3a47a9dcbedc9af37d3c0d4172935 F main.mk 602303f3596d10237f25da030ee1d96065e2e5a8
F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea
F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5
F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
@ -1212,7 +1215,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P b33fe0dd89f3180c209fa1f9e75d0a7acab12b8e P 7f148edb30103c5f4fee20cd08e38537f9615bf2
R c65f16b94aeceea9cda28cb8f092d4a9 R d01caf1e8e04bd7c1b6e26fb465c90b6
U dan U dan
Z 822a98c34fd542b912bf890d737a0e9f Z 5c3f4d7bf4502327dfa6eb630b5a26ec

View File

@ -1 +1 @@
7f148edb30103c5f4fee20cd08e38537f9615bf2 d09f7800cf14f73ea86d037107ef80295b2c173a