1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-01 06:27:03 +03:00

Add tests to restore coverage of fts5_tokenizer.c.

FossilOrigin-Name: 8f9257361b05e368bf433e56d0698923b0f97d12e7c0ad7760aaab6746c0e467
This commit is contained in:
dan
2024-08-17 17:22:49 +00:00
parent 342984075b
commit b651084713
8 changed files with 146 additions and 45 deletions

View File

@ -628,6 +628,9 @@ proc print_categories {lMap} {
$caseP
$caseS
$caseZ
default:
return 1;
}
return 0;
}

View File

@ -79,7 +79,7 @@ static int fts5AsciiCreate(
int i;
memset(p, 0, sizeof(AsciiTokenizer));
memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
fts5AsciiAddExceptions(p, zArg, 1);
@ -90,7 +90,6 @@ static int fts5AsciiCreate(
rc = SQLITE_ERROR;
}
}
if( rc==SQLITE_OK && i<nArg ) rc = SQLITE_ERROR;
if( rc!=SQLITE_OK ){
fts5AsciiDelete((Fts5Tokenizer*)p);
p = 0;
@ -382,7 +381,7 @@ static int fts5UnicodeCreate(
}
/* Search for a "categories" argument */
for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
if( 0==sqlite3_stricmp(azArg[i], "categories") ){
zCat = azArg[i+1];
}
@ -391,7 +390,7 @@ static int fts5UnicodeCreate(
rc = unicodeSetCategories(p, zCat);
}
for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
@ -416,8 +415,6 @@ static int fts5UnicodeCreate(
rc = SQLITE_ERROR;
}
}
if( i<nArg && rc==SQLITE_OK ) rc = SQLITE_ERROR;
}else{
rc = SQLITE_NOMEM;
}
@ -1294,41 +1291,46 @@ static int fts5TriCreate(
Fts5Tokenizer **ppOut
){
int rc = SQLITE_OK;
TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew));
TrigramTokenizer *pNew = 0;
UNUSED_PARAM(pUnused);
if( pNew==0 ){
rc = SQLITE_NOMEM;
if( nArg%2 ){
rc = SQLITE_ERROR;
}else{
int i;
pNew->bFold = 1;
pNew->iFoldParam = 0;
for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
rc = SQLITE_ERROR;
pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew));
if( pNew==0 ){
rc = SQLITE_NOMEM;
}else{
pNew->bFold = 1;
pNew->iFoldParam = 0;
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
rc = SQLITE_ERROR;
}else{
pNew->bFold = (zArg[0]=='0');
}
}else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
rc = SQLITE_ERROR;
}else{
pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0;
}
}else{
pNew->bFold = (zArg[0]=='0');
}
}else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
rc = SQLITE_ERROR;
}else{
pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0;
}
}else{
}
if( pNew->iFoldParam!=0 && pNew->bFold==0 ){
rc = SQLITE_ERROR;
}
}
if( i<nArg && rc==SQLITE_OK ) rc = SQLITE_ERROR;
if( pNew->iFoldParam!=0 && pNew->bFold==0 ){
rc = SQLITE_ERROR;
}
if( rc!=SQLITE_OK ){
fts5TriDelete((Fts5Tokenizer*)pNew);
pNew = 0;
if( rc!=SQLITE_OK ){
fts5TriDelete((Fts5Tokenizer*)pNew);
pNew = 0;
}
}
}
*ppOut = (Fts5Tokenizer*)pNew;

View File

@ -364,6 +364,9 @@ int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){
default: return 1; }
break;
default:
return 1;
}
return 0;
}

View File

@ -55,5 +55,22 @@ do_execsql_test 1.5 {
SELECT * FROM t4t
} {สนามกีฬา 1 1}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 2.0 "
CREATE VIRTUAL TABLE x1 USING fts5(c,
tokenize=\"unicode61 categories ' \t'\");
"
do_catchsql_test 2.1 "
CREATE VIRTUAL TABLE x2 USING fts5(c,
tokenize=\"unicode61 categories 'N*\t\tMYZ'\");
" {1 {error in tokenizer constructor}}
do_catchsql_test 2.2 "
CREATE VIRTUAL TABLE x2 USING fts5(c,
tokenize=\"unicode61 categories 'N*\t\tXYZ'\");
" {1 {error in tokenizer constructor}}
finish_test

View File

@ -286,5 +286,61 @@ do_execsql_test 9.3 {
SELECT rowid FROM t1 WHERE a12 LIKE NULL
} {}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 10.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=trigram);
}
do_test 10.1 {
foreach {val} {
"abc \UFFjkl\UFF"
"abc \UFFFjkl\UFFF"
"abc \UFFFFjkl\UFFFF"
"abc \UFFFFFjkl\UFFFFF"
"\UFFjkl\UFF abc"
"\UFFFjkl\UFFF abc"
"\UFFFFjkl\UFFFF abc"
"\UFFFFFjkl\UFFFFF abc"
"\U10001jkl\U10001 abc"
} {
execsql { INSERT INTO t1 VALUES( $val ) }
}
} {}
do_test 10.2 {
foreach {val} {
X'E18000626320646566'
X'61EDA0806320646566'
X'61EDA0806320646566'
X'61EFBFBE6320646566'
X'76686920E18000626320646566'
X'7668692061EDA0806320646566'
X'7668692061EDA0806320646566'
X'7668692061EFBFBE6320646566'
} {
execsql " INSERT INTO t1 VALUES( $val ) "
}
} {}
do_test 10.3 {
set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
INSERT INTO t1 VALUES('abcd' || $a);
INSERT INTO t1 VALUES('abcd' || $b);
INSERT INTO t1 VALUES('abcd' || $c);
INSERT INTO t1 VALUES('abcd' || $d);
}
} {}
finish_test

View File

@ -470,4 +470,24 @@ do_execsql_test 8.2.3 {
SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
} {2 4}
#-------------------------------------------------------------------------
foreach {tn val bErr} {
1 0 0
2 1 0
3 2 0
4 3 1
5 11 1
} {
reset_db
set aRes(0) {0 {}}
set aRes(1) {1 {error in tokenizer constructor}}
set res $aRes($bErr)
do_catchsql_test 9.1.$tn "
CREATE VIRTUAL TABLE bl USING fts5(
s, tokenize='trigram remove_diacritics $val'
);
" $res
}
finish_test