mirror of
https://github.com/sqlite/sqlite.git
synced 2026-01-06 08:01:16 +03:00
Add a test to verify that the fts3 and fts5 "unicode61" tokenizers are byte for byte compatible.
FossilOrigin-Name: 8ec8314354edc9d6f7d1c8a6370fd984a552a52f
This commit is contained in:
@@ -418,6 +418,7 @@ TESTSRC += \
|
||||
$(TOP)/ext/misc/fuzzer.c \
|
||||
$(TOP)/ext/fts5/fts5_tcl.c \
|
||||
$(TOP)/ext/fts5/fts5_test_mi.c \
|
||||
$(TOP)/ext/fts5/fts5_test_tok.c \
|
||||
$(TOP)/ext/misc/ieee754.c \
|
||||
$(TOP)/ext/misc/nextchar.c \
|
||||
$(TOP)/ext/misc/percentile.c \
|
||||
|
||||
@@ -1087,6 +1087,7 @@ TESTEXT = \
|
||||
$(TOP)\ext\misc\fuzzer.c \
|
||||
$(TOP)\ext\fts5\fts5_tcl.c \
|
||||
$(TOP)\ext\fts5\fts5_test_mi.c \
|
||||
$(TOP)\ext\fts5\fts5_test_tok.c \
|
||||
$(TOP)\ext\misc\ieee754.c \
|
||||
$(TOP)\ext\misc\nextchar.c \
|
||||
$(TOP)\ext\misc\percentile.c \
|
||||
|
||||
@@ -23,7 +23,8 @@
|
||||
#include <assert.h>
|
||||
|
||||
extern int sqlite3_fts5_may_be_corrupt;
|
||||
extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *);
|
||||
extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3*);
|
||||
extern int sqlite3Fts5TestRegisterTok(sqlite3*, fts5_api*);
|
||||
|
||||
/*************************************************************************
|
||||
** This is a copy of the first part of the SqliteDb structure in
|
||||
@@ -1078,6 +1079,32 @@ static int f5tRegisterMatchinfo(
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
static int f5tRegisterTok(
|
||||
void * clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
int rc;
|
||||
sqlite3 *db = 0;
|
||||
fts5_api *pApi = 0;
|
||||
|
||||
if( objc!=2 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "DB");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
rc = sqlite3Fts5TestRegisterTok(db, pApi);
|
||||
if( rc!=SQLITE_OK ){
|
||||
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Entry point.
|
||||
*/
|
||||
@@ -1093,7 +1120,8 @@ int Fts5tcl_Init(Tcl_Interp *interp){
|
||||
{ "sqlite3_fts5_create_function", f5tCreateFunction, 0 },
|
||||
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },
|
||||
{ "sqlite3_fts5_token_hash", f5tTokenHash, 0 },
|
||||
{ "sqlite3_fts5_register_matchinfo", f5tRegisterMatchinfo, 0 }
|
||||
{ "sqlite3_fts5_register_matchinfo", f5tRegisterMatchinfo, 0 },
|
||||
{ "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 }
|
||||
};
|
||||
int i;
|
||||
F5tTokenizerContext *pContext;
|
||||
|
||||
482
ext/fts5/fts5_test_tok.c
Normal file
482
ext/fts5/fts5_test_tok.c
Normal file
@@ -0,0 +1,482 @@
|
||||
/*
|
||||
** 2013 Apr 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This file contains code for the "fts5tokenize" virtual table module.
|
||||
** An fts5tokenize virtual table is created as follows:
|
||||
**
|
||||
** CREATE VIRTUAL TABLE <tbl> USING fts5tokenize(
|
||||
** <tokenizer-name>, <arg-1>, ...
|
||||
** );
|
||||
**
|
||||
** The table created has the following schema:
|
||||
**
|
||||
** CREATE TABLE <tbl>(input HIDDEN, token, start, end, position)
|
||||
**
|
||||
** When queried, the query must include a WHERE clause of type:
|
||||
**
|
||||
** input = <string>
|
||||
**
|
||||
** The virtual table module tokenizes this <string>, using the FTS3
|
||||
** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE
|
||||
** statement and returns one row for each token in the result. With
|
||||
** fields set as follows:
|
||||
**
|
||||
** input: Always set to a copy of <string>
|
||||
** token: A token from the input.
|
||||
** start: Byte offset of the token within the input <string>.
|
||||
** end: Byte offset of the byte immediately following the end of the
|
||||
** token within the input string.
|
||||
** pos: Token offset of token within input.
|
||||
**
|
||||
*/
|
||||
#if defined(SQLITE_TEST) && defined(SQLITE_ENABLE_FTS5)
|
||||
|
||||
#include <fts5.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
typedef struct Fts5tokTable Fts5tokTable;
|
||||
typedef struct Fts5tokCursor Fts5tokCursor;
|
||||
typedef struct Fts5tokRow Fts5tokRow;
|
||||
|
||||
/*
|
||||
** Virtual table structure.
|
||||
*/
|
||||
struct Fts5tokTable {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
fts5_tokenizer tok; /* Tokenizer functions */
|
||||
Fts5Tokenizer *pTok; /* Tokenizer instance */
|
||||
};
|
||||
|
||||
/*
|
||||
** A container for a rows values.
|
||||
*/
|
||||
struct Fts5tokRow {
|
||||
char *zToken;
|
||||
int iStart;
|
||||
int iEnd;
|
||||
int iPos;
|
||||
};
|
||||
|
||||
/*
|
||||
** Virtual table cursor structure.
|
||||
*/
|
||||
struct Fts5tokCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
int iRowid; /* Current 'rowid' value */
|
||||
char *zInput; /* Input string */
|
||||
int nRow; /* Number of entries in aRow[] */
|
||||
Fts5tokRow *aRow; /* Array of rows to return */
|
||||
};
|
||||
|
||||
static void fts5tokDequote(char *z){
|
||||
char q = z[0];
|
||||
|
||||
if( q=='[' || q=='\'' || q=='"' || q=='`' ){
|
||||
int iIn = 1;
|
||||
int iOut = 0;
|
||||
if( q=='[' ) q = ']';
|
||||
|
||||
while( z[iIn] ){
|
||||
if( z[iIn]==q ){
|
||||
if( z[iIn+1]!=q ){
|
||||
/* Character iIn was the close quote. */
|
||||
iIn++;
|
||||
break;
|
||||
}else{
|
||||
/* Character iIn and iIn+1 form an escaped quote character. Skip
|
||||
** the input cursor past both and copy a single quote character
|
||||
** to the output buffer. */
|
||||
iIn += 2;
|
||||
z[iOut++] = q;
|
||||
}
|
||||
}else{
|
||||
z[iOut++] = z[iIn++];
|
||||
}
|
||||
}
|
||||
|
||||
z[iOut] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** The second argument, argv[], is an array of pointers to nul-terminated
|
||||
** strings. This function makes a copy of the array and strings into a
|
||||
** single block of memory. It then dequotes any of the strings that appear
|
||||
** to be quoted.
|
||||
**
|
||||
** If successful, output parameter *pazDequote is set to point at the
|
||||
** array of dequoted strings and SQLITE_OK is returned. The caller is
|
||||
** responsible for eventually calling sqlite3_free() to free the array
|
||||
** in this case. Or, if an error occurs, an SQLite error code is returned.
|
||||
** The final value of *pazDequote is undefined in this case.
|
||||
*/
|
||||
static int fts5tokDequoteArray(
|
||||
int argc, /* Number of elements in argv[] */
|
||||
const char * const *argv, /* Input array */
|
||||
char ***pazDequote /* Output array */
|
||||
){
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
if( argc==0 ){
|
||||
*pazDequote = 0;
|
||||
}else{
|
||||
int i;
|
||||
int nByte = 0;
|
||||
char **azDequote;
|
||||
|
||||
for(i=0; i<argc; i++){
|
||||
nByte += (int)(strlen(argv[i]) + 1);
|
||||
}
|
||||
|
||||
*pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte);
|
||||
if( azDequote==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
char *pSpace = (char *)&azDequote[argc];
|
||||
for(i=0; i<argc; i++){
|
||||
int n = (int)strlen(argv[i]);
|
||||
azDequote[i] = pSpace;
|
||||
memcpy(pSpace, argv[i], n+1);
|
||||
fts5tokDequote(pSpace);
|
||||
pSpace += (n+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Schema of the tokenizer table.
|
||||
*/
|
||||
#define FTS3_TOK_SCHEMA "CREATE TABLE x(input HIDDEN, token, start, end, position)"
|
||||
|
||||
/*
|
||||
** This function does all the work for both the xConnect and xCreate methods.
|
||||
** These tables have no persistent representation of their own, so xConnect
|
||||
** and xCreate are identical operations.
|
||||
**
|
||||
** argv[0]: module name
|
||||
** argv[1]: database name
|
||||
** argv[2]: table name
|
||||
** argv[3]: first argument (tokenizer name)
|
||||
*/
|
||||
static int fts5tokConnectMethod(
|
||||
sqlite3 *db, /* Database connection */
|
||||
void *pCtx, /* Pointer to fts5_api object */
|
||||
int argc, /* Number of elements in argv array */
|
||||
const char * const *argv, /* xCreate/xConnect argument array */
|
||||
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
||||
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
||||
){
|
||||
fts5_api *pApi = (fts5_api*)pCtx;
|
||||
Fts5tokTable *pTab = 0;
|
||||
int rc;
|
||||
char **azDequote = 0;
|
||||
int nDequote;
|
||||
|
||||
rc = sqlite3_declare_vtab(db,
|
||||
"CREATE TABLE x(input HIDDEN, token, start, end, position)"
|
||||
);
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
nDequote = argc-3;
|
||||
rc = fts5tokDequoteArray(nDequote, &argv[3], &azDequote);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
pTab = (Fts5tokTable*)sqlite3_malloc(sizeof(Fts5tokTable));
|
||||
if( pTab==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
memset(pTab, 0, sizeof(Fts5tokTable));
|
||||
}
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
void *pTokCtx = 0;
|
||||
const char *zModule = 0;
|
||||
if( nDequote>0 ){
|
||||
zModule = azDequote[0];
|
||||
}
|
||||
|
||||
rc = pApi->xFindTokenizer(pApi, zModule, &pTokCtx, &pTab->tok);
|
||||
if( rc==SQLITE_OK ){
|
||||
const char **azArg = (const char **)&azDequote[1];
|
||||
int nArg = nDequote>0 ? nDequote-1 : 0;
|
||||
rc = pTab->tok.xCreate(pTokCtx, azArg, nArg, &pTab->pTok);
|
||||
}
|
||||
}
|
||||
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlite3_free(pTab);
|
||||
pTab = 0;
|
||||
}
|
||||
|
||||
*ppVtab = (sqlite3_vtab*)pTab;
|
||||
sqlite3_free(azDequote);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function does the work for both the xDisconnect and xDestroy methods.
|
||||
** These tables have no persistent representation of their own, so xDisconnect
|
||||
** and xDestroy are identical operations.
|
||||
*/
|
||||
static int fts5tokDisconnectMethod(sqlite3_vtab *pVtab){
|
||||
Fts5tokTable *pTab = (Fts5tokTable *)pVtab;
|
||||
if( pTab->pTok ){
|
||||
pTab->tok.xDelete(pTab->pTok);
|
||||
}
|
||||
sqlite3_free(pTab);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xBestIndex - Analyze a WHERE and ORDER BY clause.
|
||||
*/
|
||||
static int fts5tokBestIndexMethod(
|
||||
sqlite3_vtab *pVTab,
|
||||
sqlite3_index_info *pInfo
|
||||
){
|
||||
int i;
|
||||
|
||||
for(i=0; i<pInfo->nConstraint; i++){
|
||||
if( pInfo->aConstraint[i].usable
|
||||
&& pInfo->aConstraint[i].iColumn==0
|
||||
&& pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ
|
||||
){
|
||||
pInfo->idxNum = 1;
|
||||
pInfo->aConstraintUsage[i].argvIndex = 1;
|
||||
pInfo->aConstraintUsage[i].omit = 1;
|
||||
pInfo->estimatedCost = 1;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
|
||||
pInfo->idxNum = 0;
|
||||
assert( pInfo->estimatedCost>1000000.0 );
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xOpen - Open a cursor.
|
||||
*/
|
||||
static int fts5tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
|
||||
Fts5tokCursor *pCsr;
|
||||
|
||||
pCsr = (Fts5tokCursor *)sqlite3_malloc(sizeof(Fts5tokCursor));
|
||||
if( pCsr==0 ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(Fts5tokCursor));
|
||||
|
||||
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Reset the tokenizer cursor passed as the only argument. As if it had
|
||||
** just been returned by fts5tokOpenMethod().
|
||||
*/
|
||||
static void fts5tokResetCursor(Fts5tokCursor *pCsr){
|
||||
int i;
|
||||
for(i=0; i<pCsr->nRow; i++){
|
||||
sqlite3_free(pCsr->aRow[i].zToken);
|
||||
}
|
||||
sqlite3_free(pCsr->zInput);
|
||||
sqlite3_free(pCsr->aRow);
|
||||
pCsr->zInput = 0;
|
||||
pCsr->aRow = 0;
|
||||
pCsr->nRow = 0;
|
||||
pCsr->iRowid = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** xClose - Close a cursor.
|
||||
*/
|
||||
static int fts5tokCloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts5tokCursor *pCsr = (Fts5tokCursor *)pCursor;
|
||||
fts5tokResetCursor(pCsr);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xNext - Advance the cursor to the next row, if any.
|
||||
*/
|
||||
static int fts5tokNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts5tokCursor *pCsr = (Fts5tokCursor *)pCursor;
|
||||
pCsr->iRowid++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int fts5tokCb(
|
||||
void *pCtx, /* Pointer to Fts5tokCursor */
|
||||
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
||||
const char *pToken, /* Pointer to buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Byte offset of token within input text */
|
||||
int iEnd /* Byte offset of end of token within input text */
|
||||
){
|
||||
Fts5tokCursor *pCsr = (Fts5tokCursor*)pCtx;
|
||||
Fts5tokRow *pRow;
|
||||
|
||||
if( (pCsr->nRow & (pCsr->nRow-1))==0 ){
|
||||
int nNew = pCsr->nRow ? pCsr->nRow*2 : 32;
|
||||
Fts5tokRow *aNew;
|
||||
aNew = (Fts5tokRow*)sqlite3_realloc(pCsr->aRow, nNew*sizeof(Fts5tokRow));
|
||||
if( aNew==0 ) return SQLITE_NOMEM;
|
||||
memset(&aNew[pCsr->nRow], 0, sizeof(Fts5tokRow)*(nNew-pCsr->nRow));
|
||||
pCsr->aRow = aNew;
|
||||
}
|
||||
|
||||
pRow = &pCsr->aRow[pCsr->nRow];
|
||||
pRow->iStart = iStart;
|
||||
pRow->iEnd = iEnd;
|
||||
if( pCsr->nRow ){
|
||||
pRow->iPos = pRow[-1].iPos + ((tflags & FTS5_TOKEN_COLOCATED) ? 0 : 1);
|
||||
}
|
||||
pRow->zToken = sqlite3_malloc(nToken+1);
|
||||
if( pRow->zToken==0 ) return SQLITE_NOMEM;
|
||||
memcpy(pRow->zToken, pToken, nToken);
|
||||
pRow->zToken[nToken] = 0;
|
||||
pCsr->nRow++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xFilter - Initialize a cursor to point at the start of its data.
|
||||
*/
|
||||
static int fts5tokFilterMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
|
||||
int idxNum, /* Strategy index */
|
||||
const char *idxStr, /* Unused */
|
||||
int nVal, /* Number of elements in apVal */
|
||||
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
||||
){
|
||||
int rc = SQLITE_ERROR;
|
||||
Fts5tokCursor *pCsr = (Fts5tokCursor *)pCursor;
|
||||
Fts5tokTable *pTab = (Fts5tokTable *)(pCursor->pVtab);
|
||||
|
||||
fts5tokResetCursor(pCsr);
|
||||
if( idxNum==1 ){
|
||||
const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
|
||||
int nByte = sqlite3_value_bytes(apVal[0]);
|
||||
pCsr->zInput = sqlite3_malloc(nByte+1);
|
||||
if( pCsr->zInput==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
memcpy(pCsr->zInput, zByte, nByte);
|
||||
pCsr->zInput[nByte] = 0;
|
||||
rc = pTab->tok.xTokenize(
|
||||
pTab->pTok, (void*)pCsr, 0, zByte, nByte, fts5tokCb
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
return fts5tokNextMethod(pCursor);
|
||||
}
|
||||
|
||||
/*
|
||||
** xEof - Return true if the cursor is at EOF, or false otherwise.
|
||||
*/
|
||||
static int fts5tokEofMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts5tokCursor *pCsr = (Fts5tokCursor *)pCursor;
|
||||
return (pCsr->iRowid>pCsr->nRow);
|
||||
}
|
||||
|
||||
/*
|
||||
** xColumn - Return a column value.
|
||||
*/
|
||||
static int fts5tokColumnMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
|
||||
int iCol /* Index of column to read value from */
|
||||
){
|
||||
Fts5tokCursor *pCsr = (Fts5tokCursor *)pCursor;
|
||||
Fts5tokRow *pRow = &pCsr->aRow[pCsr->iRowid-1];
|
||||
|
||||
/* CREATE TABLE x(input, token, start, end, position) */
|
||||
switch( iCol ){
|
||||
case 0:
|
||||
sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
|
||||
break;
|
||||
case 1:
|
||||
sqlite3_result_text(pCtx, pRow->zToken, -1, SQLITE_TRANSIENT);
|
||||
break;
|
||||
case 2:
|
||||
sqlite3_result_int(pCtx, pRow->iStart);
|
||||
break;
|
||||
case 3:
|
||||
sqlite3_result_int(pCtx, pRow->iEnd);
|
||||
break;
|
||||
default:
|
||||
assert( iCol==4 );
|
||||
sqlite3_result_int(pCtx, pRow->iPos);
|
||||
break;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xRowid - Return the current rowid for the cursor.
|
||||
*/
|
||||
static int fts5tokRowidMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite_int64 *pRowid /* OUT: Rowid value */
|
||||
){
|
||||
Fts5tokCursor *pCsr = (Fts5tokCursor *)pCursor;
|
||||
*pRowid = (sqlite3_int64)pCsr->iRowid;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Register the fts5tok module with database connection db. Return SQLITE_OK
|
||||
** if successful or an error code if sqlite3_create_module() fails.
|
||||
*/
|
||||
int sqlite3Fts5TestRegisterTok(sqlite3 *db, fts5_api *pApi){
|
||||
static const sqlite3_module fts5tok_module = {
|
||||
0, /* iVersion */
|
||||
fts5tokConnectMethod, /* xCreate */
|
||||
fts5tokConnectMethod, /* xConnect */
|
||||
fts5tokBestIndexMethod, /* xBestIndex */
|
||||
fts5tokDisconnectMethod, /* xDisconnect */
|
||||
fts5tokDisconnectMethod, /* xDestroy */
|
||||
fts5tokOpenMethod, /* xOpen */
|
||||
fts5tokCloseMethod, /* xClose */
|
||||
fts5tokFilterMethod, /* xFilter */
|
||||
fts5tokNextMethod, /* xNext */
|
||||
fts5tokEofMethod, /* xEof */
|
||||
fts5tokColumnMethod, /* xColumn */
|
||||
fts5tokRowidMethod, /* xRowid */
|
||||
0, /* xUpdate */
|
||||
0, /* xBegin */
|
||||
0, /* xSync */
|
||||
0, /* xCommit */
|
||||
0, /* xRollback */
|
||||
0, /* xFindFunction */
|
||||
0, /* xRename */
|
||||
0, /* xSavepoint */
|
||||
0, /* xRelease */
|
||||
0 /* xRollbackTo */
|
||||
};
|
||||
int rc; /* Return code */
|
||||
|
||||
rc = sqlite3_create_module(db, "fts5tokenize", &fts5tok_module, (void*)pApi);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* defined(SQLITE_TEST) && defined(SQLITE_ENABLE_FTS5) */
|
||||
115
ext/fts5/test/fts5tok1.test
Normal file
115
ext/fts5/test/fts5tok1.test
Normal file
@@ -0,0 +1,115 @@
|
||||
# 2016 Jan 15
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#*************************************************************************
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] fts5_common.tcl]
|
||||
ifcapable !fts5 { finish_test ; return }
|
||||
set ::testprefix fts5tok1
|
||||
|
||||
|
||||
sqlite3_fts5_register_fts5tokenize db
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Simple test cases. Using the default (ascii) tokenizer.
|
||||
#
|
||||
do_execsql_test 1.0 {
|
||||
CREATE VIRTUAL TABLE t1 USING fts5tokenize(ascii);
|
||||
CREATE VIRTUAL TABLE t2 USING fts5tokenize();
|
||||
CREATE VIRTUAL TABLE t3 USING fts5tokenize(
|
||||
ascii, 'separators', 'xyz', tokenchars, ''''
|
||||
);
|
||||
}
|
||||
|
||||
foreach {tn tbl} {1 t1 2 t2 3 t3} {
|
||||
do_execsql_test 1.$tn.1 "SELECT input, * FROM $tbl ('one two three')" {
|
||||
{one two three} one 0 3 0
|
||||
{one two three} two 4 7 1
|
||||
{one two three} three 8 13 2
|
||||
}
|
||||
|
||||
do_execsql_test 1.$tn.2 "
|
||||
SELECT token FROM $tbl WHERE input = 'OnE tWo tHrEe'
|
||||
" {
|
||||
one two three
|
||||
}
|
||||
}
|
||||
|
||||
do_execsql_test 1.4 {
|
||||
SELECT token FROM t3 WHERE input = '1x2x3x'
|
||||
} {1 2 3}
|
||||
|
||||
do_execsql_test 1.5 {
|
||||
SELECT token FROM t1 WHERE input = '1x2x3x'
|
||||
} {1x2x3x}
|
||||
|
||||
do_execsql_test 1.6 {
|
||||
SELECT token FROM t3 WHERE input = '1''2x3x'
|
||||
} {1'2 3}
|
||||
|
||||
do_execsql_test 1.7 {
|
||||
SELECT token FROM t3 WHERE input = ''
|
||||
} {}
|
||||
|
||||
do_execsql_test 1.8 {
|
||||
SELECT token FROM t3 WHERE input = NULL
|
||||
} {}
|
||||
|
||||
do_execsql_test 1.9 {
|
||||
SELECT input, * FROM t3 WHERE input = 123
|
||||
} {123 123 0 3 0}
|
||||
|
||||
do_execsql_test 1.10 {
|
||||
SELECT input, * FROM t1 WHERE input = 'a b c' AND token = 'b';
|
||||
} {
|
||||
{a b c} b 2 3 1
|
||||
}
|
||||
|
||||
do_execsql_test 1.11 {
|
||||
SELECT input, * FROM t1 WHERE token = 'b' AND input = 'a b c';
|
||||
} {
|
||||
{a b c} b 2 3 1
|
||||
}
|
||||
|
||||
do_execsql_test 1.12 {
|
||||
SELECT input, * FROM t1 WHERE input < 'b' AND input = 'a b c';
|
||||
} {
|
||||
{a b c} a 0 1 0
|
||||
{a b c} b 2 3 1
|
||||
{a b c} c 4 5 2
|
||||
}
|
||||
|
||||
do_execsql_test 1.13.1 {
|
||||
CREATE TABLE c1(x);
|
||||
INSERT INTO c1(x) VALUES('a b c');
|
||||
INSERT INTO c1(x) VALUES('d e f');
|
||||
}
|
||||
do_execsql_test 1.13.2 {
|
||||
SELECT c1.*, input, t1.* FROM c1, t1 WHERE input = x AND c1.rowid=t1.rowid;
|
||||
} {
|
||||
{a b c} {a b c} a 0 1 0
|
||||
{d e f} {d e f} e 2 3 1
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Error cases.
|
||||
#
|
||||
do_catchsql_test 2.0 {
|
||||
CREATE VIRTUAL TABLE tX USING fts5tokenize(nosuchtokenizer);
|
||||
} {1 {vtable constructor failed: tX}}
|
||||
|
||||
do_catchsql_test 2.1 {
|
||||
CREATE VIRTUAL TABLE t4 USING fts5tokenize;
|
||||
SELECT * FROM t4;
|
||||
} {1 {SQL logic error or missing database}}
|
||||
|
||||
|
||||
finish_test
|
||||
47
ext/fts5/test/fts5tok2.test
Normal file
47
ext/fts5/test/fts5tok2.test
Normal file
@@ -0,0 +1,47 @@
|
||||
# 2016 Jan 15
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#*************************************************************************
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] fts5_common.tcl]
|
||||
ifcapable !fts5||!fts3 { finish_test ; return }
|
||||
set ::testprefix fts5tok2
|
||||
|
||||
sqlite3_fts5_register_fts5tokenize db
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Simple test cases. Using the default (ascii) tokenizer.
|
||||
#
|
||||
do_execsql_test 1.0 {
|
||||
CREATE VIRTUAL TABLE t5 USING fts5tokenize(unicode61);
|
||||
CREATE VIRTUAL TABLE t3 USING fts3tokenize(unicode61);
|
||||
}
|
||||
|
||||
do_test 1.1 {
|
||||
array unset -nocomplain A
|
||||
|
||||
for {set i 1} {$i < 65536} {incr i} {
|
||||
set input [format "abc%cxyz" $i]
|
||||
set expect [execsql {
|
||||
SELECT input, token, start, end FROM t3 WHERE input=$input
|
||||
}]
|
||||
|
||||
incr A([llength $expect])
|
||||
|
||||
set res [execsql {
|
||||
SELECT input, token, start, end FROM t5($input)
|
||||
}]
|
||||
if {$res != $expect} {error "failed at i=$i"}
|
||||
}
|
||||
} {}
|
||||
|
||||
do_test 1.1.nTokenChars=$A(4).nSeparators=$A(8) {} {}
|
||||
|
||||
finish_test
|
||||
3
main.mk
3
main.mk
@@ -333,7 +333,8 @@ TESTSRC += \
|
||||
$(TOP)/ext/misc/wholenumber.c \
|
||||
$(TOP)/ext/misc/vfslog.c \
|
||||
$(TOP)/ext/fts5/fts5_tcl.c \
|
||||
$(TOP)/ext/fts5/fts5_test_mi.c
|
||||
$(TOP)/ext/fts5/fts5_test_mi.c \
|
||||
$(TOP)/ext/fts5/fts5_test_tok.c
|
||||
|
||||
|
||||
#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
|
||||
|
||||
23
manifest
23
manifest
@@ -1,8 +1,8 @@
|
||||
C Simplification\sto\sthe\sISO8610\sparser\sin\sthe\simnplementation\sof\sdate/time\nfunctions.
|
||||
D 2016-01-14T19:32:46.777
|
||||
F Makefile.in cfa1ac03c4b414992fd53f24d978b45b0c21de55
|
||||
C Add\sa\stest\sto\sverify\sthat\sthe\sfts3\sand\sfts5\s"unicode61"\stokenizers\sare\sbyte\sfor\sbyte\scompatible.
|
||||
D 2016-01-14T20:05:59.293
|
||||
F Makefile.in a476545d0c8626224d0bacac85c6e2967474af81
|
||||
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
|
||||
F Makefile.msc 2d8b2ad5a03315940bcb9e64145ab70850d66b4d
|
||||
F Makefile.msc 01e855f958932d0d3ed62ec675fc63e2cef61fcb
|
||||
F README.md 8ecc12493ff9f820cdea6520a9016001cb2e59b7
|
||||
F VERSION 866588d1edf0ccb5b0d33896974338f97564f719
|
||||
F aclocal.m4 a5c22d164aff7ed549d53a90fa56d56955281f50
|
||||
@@ -106,8 +106,9 @@ F ext/fts5/fts5_hash.c 1b113977296cf4212c6ec667d5e3f2bd18036955
|
||||
F ext/fts5/fts5_index.c aa798d3a839847fd351b3d0f49520f190e57c2e3
|
||||
F ext/fts5/fts5_main.c 488ceecdb4400ecc6a3d3b2247cedef153955388
|
||||
F ext/fts5/fts5_storage.c f7b2d330dd7b29a9f4da09f6d85879ca8c41b2e8
|
||||
F ext/fts5/fts5_tcl.c 18e9382d8cdad4c05b49559c68494968b9b4a4fb
|
||||
F ext/fts5/fts5_tcl.c bcacc05dec0446e7b1a44d5d906057e677bd7ea4
|
||||
F ext/fts5/fts5_test_mi.c 1ec66ffdf7632077fbd773b7a6df5153272ec070
|
||||
F ext/fts5/fts5_test_tok.c db08af63673c3a7d39f053b36fd6e065017706be
|
||||
F ext/fts5/fts5_tokenize.c 504984ac6993323247221eebe3cd55bead01b5f8
|
||||
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
|
||||
F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1
|
||||
@@ -174,6 +175,8 @@ F ext/fts5/test/fts5simple.test 2bc6451cbe887a9215f5b14ae307c70d850344c9
|
||||
F ext/fts5/test/fts5simple2.test 843f1f7fe439ff32bf74f4fd6430632f9636ef3a
|
||||
F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671
|
||||
F ext/fts5/test/fts5synonym2.test d2d9099d9d105b55ea03fd52d61ae2847d534129
|
||||
F ext/fts5/test/fts5tok1.test beb894c6f3468f10a574302f69ebe4436b0287c7
|
||||
F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
|
||||
F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89
|
||||
F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841
|
||||
F ext/fts5/test/fts5unicode2.test c1dd890ba32b7609adba78e420faa847abe43b59
|
||||
@@ -262,7 +265,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e
|
||||
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
|
||||
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
|
||||
F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
|
||||
F main.mk 20e46c09ac8732a74a9e2d55e8d90257d97eb844
|
||||
F main.mk 55f0940264e55540773214ee1f2dbba0bf359957
|
||||
F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
|
||||
F mptest/config01.test 3c6adcbc50b991866855f1977ff172eb6d901271
|
||||
F mptest/config02.test 4415dfe36c48785f751e16e32c20b077c28ae504
|
||||
@@ -1412,7 +1415,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f
|
||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
|
||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||
P e2cba1bbfdcb24e35b2275e29071d8a4e4943417
|
||||
R 79fbdde13a17c7cb82657d609276debb
|
||||
U drh
|
||||
Z 2e5da468d29690cad1fa7e36751b6984
|
||||
P b9159f42a517a95ae52464c96431708c00b7bb36
|
||||
R e09ac1cbb3100668573769c5e2d7569a
|
||||
U dan
|
||||
Z 6d6590c289b1d7ff01c17cc7ff7964e6
|
||||
|
||||
@@ -1 +1 @@
|
||||
b9159f42a517a95ae52464c96431708c00b7bb36
|
||||
8ec8314354edc9d6f7d1c8a6370fd984a552a52f
|
||||
Reference in New Issue
Block a user