mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-30 19:03:16 +03:00
Add the "unindexed" column option to fts5.
FossilOrigin-Name: 86309961344f4076ddcf55d730d3600ec3b6e45c
This commit is contained in:
@ -92,6 +92,7 @@ struct Fts5Config {
|
||||
char *zName; /* Name of FTS index */
|
||||
int nCol; /* Number of columns */
|
||||
char **azCol; /* Column names */
|
||||
u8 *abUnindexed; /* True for unindexed columns */
|
||||
int nPrefix; /* Number of prefix indexes */
|
||||
int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
|
||||
int eContent; /* An FTS5_CONTENT value */
|
||||
|
@ -134,31 +134,50 @@ static const char *fts5ConfigSkipLiteral(const char *pIn){
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
** The first character of the string pointed to by argument z is guaranteed
|
||||
** to be an open-quote character (see function fts5_isopenquote()).
|
||||
**
|
||||
** This function searches for the corresponding close-quote character within
|
||||
** the string and, if found, dequotes the string in place and adds a new
|
||||
** nul-terminator byte.
|
||||
**
|
||||
** If the close-quote is found, the value returned is the byte offset of
|
||||
** the character immediately following it. Or, if the close-quote is not
|
||||
** found, -1 is returned. If -1 is returned, the buffer is left in an
|
||||
** undefined state.
|
||||
*/
|
||||
static int fts5Dequote(char *z){
|
||||
char q;
|
||||
int iIn = 1;
|
||||
int iOut = 0;
|
||||
int bRet = 1;
|
||||
q = z[0];
|
||||
|
||||
/* Set stack variable q to the close-quote character */
|
||||
assert( q=='[' || q=='\'' || q=='"' || q=='`' );
|
||||
if( q=='[' ) q = ']';
|
||||
|
||||
while( z[iIn] ){
|
||||
if( z[iIn]==q ){
|
||||
if( z[iIn+1]!=q ){
|
||||
if( z[iIn+1]=='\0' ) bRet = 0;
|
||||
break;
|
||||
/* Character iIn was the close quote. */
|
||||
z[iOut] = '\0';
|
||||
return iIn+1;
|
||||
}else{
|
||||
/* Character iIn and iIn+1 form an escaped quote character. Skip
|
||||
** the input cursor past both and copy a single quote character
|
||||
** to the output buffer. */
|
||||
iIn += 2;
|
||||
z[iOut++] = q;
|
||||
}
|
||||
z[iOut++] = q;
|
||||
iIn += 2;
|
||||
}else{
|
||||
z[iOut++] = z[iIn++];
|
||||
}
|
||||
}
|
||||
z[iOut] = '\0';
|
||||
|
||||
return bRet;
|
||||
/* Did not find the close-quote character. Return -1. */
|
||||
z[iOut] = '\0';
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -184,18 +203,6 @@ void sqlite3Fts5Dequote(char *z){
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Trim any white-space from the right of nul-terminated string z.
|
||||
*/
|
||||
static char *fts5TrimString(char *z){
|
||||
int n = strlen(z);
|
||||
while( n>0 && fts5_iswhitespace(z[n-1]) ){
|
||||
z[--n] = '\0';
|
||||
}
|
||||
while( fts5_iswhitespace(*z) ) z++;
|
||||
return z;
|
||||
}
|
||||
|
||||
/*
|
||||
** Duplicate the string passed as the only argument into a buffer allocated
|
||||
** by sqlite3_malloc().
|
||||
@ -251,10 +258,10 @@ static int fts5ConfigParseSpecial(
|
||||
Fts5Global *pGlobal,
|
||||
Fts5Config *pConfig, /* Configuration object to update */
|
||||
const char *zCmd, /* Special command to parse */
|
||||
int nCmd, /* Size of zCmd in bytes */
|
||||
const char *zArg, /* Argument to parse */
|
||||
char **pzErr /* OUT: Error message */
|
||||
){
|
||||
int nCmd = strlen(zCmd);
|
||||
if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
|
||||
const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
|
||||
int rc = SQLITE_OK;
|
||||
@ -384,6 +391,84 @@ static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
** Gobble up the first bareword or quoted word from the input buffer zIn.
|
||||
** Return a pointer to the character immediately following the last in
|
||||
** the gobbled word if successful, or a NULL pointer otherwise (failed
|
||||
** to find close-quote character).
|
||||
**
|
||||
** Before returning, set pzOut to point to a new buffer containing a
|
||||
** nul-terminated, dequoted copy of the gobbled word. If the word was
|
||||
** quoted, *pbQuoted is also set to 1 before returning.
|
||||
**
|
||||
** If *pRc is other than SQLITE_OK when this function is called, it is
|
||||
** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
|
||||
** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
|
||||
** set if a parse error (failed to find close quote) occurs.
|
||||
*/
|
||||
static const char *fts5ConfigGobbleWord(
|
||||
int *pRc,
|
||||
const char *zIn,
|
||||
char **pzOut,
|
||||
int *pbQuoted
|
||||
){
|
||||
const char *zRet = 0;
|
||||
*pbQuoted = 0;
|
||||
*pzOut = 0;
|
||||
|
||||
if( *pRc==SQLITE_OK ){
|
||||
int nIn = strlen(zIn);
|
||||
char *zOut = sqlite3_malloc(nIn+1);
|
||||
|
||||
if( zOut==0 ){
|
||||
*pRc = SQLITE_NOMEM;
|
||||
}else{
|
||||
memcpy(zOut, zIn, nIn+1);
|
||||
if( fts5_isopenquote(zOut[0]) ){
|
||||
int ii = fts5Dequote(zOut);
|
||||
if( ii>0 ) zRet = &zIn[ii];
|
||||
*pbQuoted = 1;
|
||||
}else{
|
||||
zRet = fts5ConfigSkipBareword(zIn);
|
||||
zOut[zRet-zIn] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
if( zRet==0 ){
|
||||
sqlite3_free(zOut);
|
||||
}else{
|
||||
*pzOut = zOut;
|
||||
}
|
||||
}
|
||||
|
||||
return zRet;
|
||||
}
|
||||
|
||||
static int fts5ConfigParseColumn(
|
||||
Fts5Config *p,
|
||||
char *zCol,
|
||||
char *zArg,
|
||||
char **pzErr
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
|
||||
|| 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
|
||||
){
|
||||
*pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
|
||||
rc = SQLITE_ERROR;
|
||||
}else if( zArg ){
|
||||
if( 0==sqlite3_stricmp(zArg, "unindexed") ){
|
||||
p->abUnindexed[p->nCol] = 1;
|
||||
}else{
|
||||
*pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
p->azCol[p->nCol++] = zCol;
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Arguments nArg/azArg contain the string arguments passed to the xCreate
|
||||
** or xConnect method of the virtual table. This function attempts to
|
||||
@ -407,6 +492,7 @@ int sqlite3Fts5ConfigParse(
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
Fts5Config *pRet; /* New object to return */
|
||||
int i;
|
||||
int nByte;
|
||||
|
||||
*ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
|
||||
if( pRet==0 ) return SQLITE_NOMEM;
|
||||
@ -414,7 +500,9 @@ int sqlite3Fts5ConfigParse(
|
||||
pRet->db = db;
|
||||
pRet->iCookie = -1;
|
||||
|
||||
pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
|
||||
nByte = nArg * (sizeof(char*) + sizeof(u8));
|
||||
pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
|
||||
pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
|
||||
pRet->zDb = fts5Strdup(&rc, azArg[1]);
|
||||
pRet->zName = fts5Strdup(&rc, azArg[2]);
|
||||
if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
|
||||
@ -423,63 +511,48 @@ int sqlite3Fts5ConfigParse(
|
||||
}
|
||||
|
||||
for(i=3; rc==SQLITE_OK && i<nArg; i++){
|
||||
char *zDup = fts5Strdup(&rc, azArg[i]);
|
||||
if( zDup ){
|
||||
char *zCol = 0;
|
||||
int bParseError = 0;
|
||||
const char *zOrig = azArg[i];
|
||||
const char *z;
|
||||
char *zOne = 0;
|
||||
char *zTwo = 0;
|
||||
int bOption = 0;
|
||||
int bMustBeCol = 0;
|
||||
|
||||
/* Check if this is a quoted column name */
|
||||
if( fts5_isopenquote(zDup[0]) ){
|
||||
bParseError = fts5Dequote(zDup);
|
||||
zCol = zDup;
|
||||
}else{
|
||||
char *z = (char*)fts5ConfigSkipBareword(zDup);
|
||||
if( *z=='\0' ){
|
||||
zCol = zDup;
|
||||
}else{
|
||||
int nCmd = z - zDup;
|
||||
z = (char*)fts5ConfigSkipWhitespace(z);
|
||||
if( *z!='=' ){
|
||||
bParseError = 1;
|
||||
}else{
|
||||
z++;
|
||||
z = fts5TrimString(z);
|
||||
if( fts5_isopenquote(*z) ){
|
||||
if( fts5Dequote(z) ) bParseError = 1;
|
||||
}else{
|
||||
char *z2 = (char*)fts5ConfigSkipBareword(z);
|
||||
if( *z2 ) bParseError = 1;
|
||||
}
|
||||
if( bParseError==0 ){
|
||||
rc = fts5ConfigParseSpecial(pGlobal, pRet, zDup, nCmd, z, pzErr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( bParseError ){
|
||||
assert( *pzErr==0 );
|
||||
*pzErr = sqlite3_mprintf("parse error in \"%s\"", zDup);
|
||||
rc = SQLITE_ERROR;
|
||||
}else if( zCol ){
|
||||
if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
|
||||
|| 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
|
||||
){
|
||||
*pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
|
||||
rc = SQLITE_ERROR;
|
||||
}else{
|
||||
pRet->azCol[pRet->nCol++] = zCol;
|
||||
zDup = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_free(zDup);
|
||||
z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
|
||||
z = fts5ConfigSkipWhitespace(z);
|
||||
if( z && *z=='=' ){
|
||||
bOption = 1;
|
||||
z++;
|
||||
if( bMustBeCol ) z = 0;
|
||||
}
|
||||
z = fts5ConfigSkipWhitespace(z);
|
||||
if( z && z[0] ){
|
||||
int bDummy;
|
||||
z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
|
||||
if( z && z[0] ) z = 0;
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
if( z==0 ){
|
||||
*pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
|
||||
rc = SQLITE_ERROR;
|
||||
}else{
|
||||
if( bOption ){
|
||||
rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo, pzErr);
|
||||
}else{
|
||||
rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
|
||||
zOne = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_free(zOne);
|
||||
sqlite3_free(zTwo);
|
||||
}
|
||||
|
||||
/* If a tokenizer= option was successfully parsed, the tokenizer has
|
||||
** already been allocated. Otherwise, allocate an instance of the default
|
||||
** tokenizer (simple) now. */
|
||||
** tokenizer (unicode61) now. */
|
||||
if( rc==SQLITE_OK && pRet->pTok==0 ){
|
||||
rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
|
||||
}
|
||||
|
@ -1815,15 +1815,13 @@ static void fts5SegIterNext(
|
||||
int bDummy;
|
||||
i64 iDelta;
|
||||
|
||||
if( p->rc==SQLITE_OK ){
|
||||
pIter->iRowidOffset--;
|
||||
pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
|
||||
iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
|
||||
iOff += nPos;
|
||||
getVarint(&a[iOff], (u64*)&iDelta);
|
||||
pIter->iRowid -= iDelta;
|
||||
fts5SegIterLoadNPos(p, pIter);
|
||||
}
|
||||
pIter->iRowidOffset--;
|
||||
pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
|
||||
iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
|
||||
iOff += nPos;
|
||||
getVarint(&a[iOff], (u64*)&iDelta);
|
||||
pIter->iRowid -= iDelta;
|
||||
fts5SegIterLoadNPos(p, pIter);
|
||||
}else{
|
||||
fts5SegIterReverseNewPage(p, pIter);
|
||||
}
|
||||
|
@ -323,6 +323,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){
|
||||
ctx.iCol = -1;
|
||||
rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
|
||||
for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
|
||||
if( pConfig->abUnindexed[iCol-1] ) continue;
|
||||
ctx.szCol = 0;
|
||||
rc = sqlite3Fts5Tokenize(pConfig,
|
||||
(const char*)sqlite3_column_text(pSeek, iCol),
|
||||
@ -486,6 +487,7 @@ int sqlite3Fts5StorageSpecialDelete(
|
||||
|
||||
rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
|
||||
for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
|
||||
if( pConfig->abUnindexed[iCol] ) continue;
|
||||
ctx.szCol = 0;
|
||||
rc = sqlite3Fts5Tokenize(pConfig,
|
||||
(const char*)sqlite3_value_text(apVal[iCol]),
|
||||
@ -564,12 +566,14 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){
|
||||
rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid);
|
||||
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
|
||||
ctx.szCol = 0;
|
||||
rc = sqlite3Fts5Tokenize(pConfig,
|
||||
(const char*)sqlite3_column_text(pScan, ctx.iCol+1),
|
||||
sqlite3_column_bytes(pScan, ctx.iCol+1),
|
||||
(void*)&ctx,
|
||||
fts5StorageInsertCallback
|
||||
);
|
||||
if( pConfig->abUnindexed[ctx.iCol]==0 ){
|
||||
rc = sqlite3Fts5Tokenize(pConfig,
|
||||
(const char*)sqlite3_column_text(pScan, ctx.iCol+1),
|
||||
sqlite3_column_bytes(pScan, ctx.iCol+1),
|
||||
(void*)&ctx,
|
||||
fts5StorageInsertCallback
|
||||
);
|
||||
}
|
||||
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
|
||||
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
|
||||
}
|
||||
@ -671,12 +675,14 @@ int sqlite3Fts5StorageInsert(
|
||||
}
|
||||
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
|
||||
ctx.szCol = 0;
|
||||
rc = sqlite3Fts5Tokenize(pConfig,
|
||||
(const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
|
||||
sqlite3_value_bytes(apVal[ctx.iCol+2]),
|
||||
(void*)&ctx,
|
||||
fts5StorageInsertCallback
|
||||
);
|
||||
if( pConfig->abUnindexed[ctx.iCol]==0 ){
|
||||
rc = sqlite3Fts5Tokenize(pConfig,
|
||||
(const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
|
||||
sqlite3_value_bytes(apVal[ctx.iCol+2]),
|
||||
(void*)&ctx,
|
||||
fts5StorageInsertCallback
|
||||
);
|
||||
}
|
||||
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
|
||||
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
|
||||
}
|
||||
@ -783,6 +789,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
|
||||
ctx.szCol = 0;
|
||||
rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
|
||||
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
|
||||
if( pConfig->abUnindexed[i] ) continue;
|
||||
ctx.iCol = i;
|
||||
ctx.szCol = 0;
|
||||
rc = sqlite3Fts5Tokenize(
|
||||
|
@ -70,7 +70,7 @@ do_catchsql_test 4.1 {
|
||||
} {1 {parse error in "tokenize = tcl abc"}}
|
||||
do_catchsql_test 4.2 {
|
||||
CREATE VIRTUAL TABLE ft2 USING fts5(x y)
|
||||
} {1 {parse error in "x y"}}
|
||||
} {1 {unrecognized column option: y}}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Test the "separators" and "tokenchars" options a bit.
|
||||
|
73
ext/fts5/test/fts5unindexed.test
Normal file
73
ext/fts5/test/fts5unindexed.test
Normal file
@ -0,0 +1,73 @@
|
||||
# 2015 Apr 24
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
# The tests in this file focus on "unindexed" columns.
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] fts5_common.tcl]
|
||||
set testprefix fts5unindexed
|
||||
|
||||
|
||||
do_execsql_test 1.1 {
|
||||
CREATE VIRTUAL TABLE t1 USING fts5(a, b UNINDEXED);
|
||||
INSERT INTO t1 VALUES('a b c', 'd e f');
|
||||
INSERT INTO t1 VALUES('g h i', 'j k l');
|
||||
} {}
|
||||
|
||||
do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1}
|
||||
do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {}
|
||||
|
||||
do_execsql_test 1.4 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
|
||||
do_execsql_test 1.5 { INSERT INTO t1(t1) VALUES('rebuild') } {}
|
||||
do_execsql_test 1.6 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
|
||||
|
||||
do_execsql_test 1.7 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1}
|
||||
do_execsql_test 1.8 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {}
|
||||
|
||||
do_execsql_test 1.9 { DELETE FROM t1 WHERE t1 MATCH 'b' } {}
|
||||
|
||||
do_execsql_test 1.10 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
|
||||
do_execsql_test 1.11 { INSERT INTO t1(t1) VALUES('rebuild') } {}
|
||||
do_execsql_test 1.12 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
|
||||
|
||||
do_execsql_test 1.13 { SELECT rowid FROM t1 WHERE t1 MATCH 'i' } {2}
|
||||
do_execsql_test 1.14 { SELECT rowid FROM t1 WHERE t1 MATCH 'l' } {}
|
||||
|
||||
do_execsql_test 2.1 {
|
||||
CREATE VIRTUAL TABLE t2 USING fts5(a UNINDEXED, b UNINDEXED);
|
||||
INSERT INTO t1 VALUES('a b c', 'd e f');
|
||||
INSERT INTO t1 VALUES('g h i', 'j k l');
|
||||
SELECT rowid FROM t2_data;
|
||||
} {1 10}
|
||||
do_execsql_test 2.2 {
|
||||
INSERT INTO t2(t2) VALUES('rebuild');
|
||||
INSERT INTO t2(t2) VALUES('integrity-check');
|
||||
SELECT rowid FROM t2_data;
|
||||
} {1 10}
|
||||
|
||||
do_execsql_test 3.1 {
|
||||
CREATE TABLE x4(i INTEGER PRIMARY KEY, a, b, c);
|
||||
CREATE VIRTUAL TABLE t4 USING fts5(a, b UNINDEXED, c, content=x4);
|
||||
INSERT INTO x4 VALUES(10, 'a b c', 'd e f', 'g h i');
|
||||
INSERT INTO x4 VALUES(20, 'j k l', 'm n o', 'p q r');
|
||||
INSERT INTO t4(t4) VALUES('rebuild');
|
||||
INSERT INTO t4(t4) VALUES('integrity-check');
|
||||
} {}
|
||||
|
||||
do_execsql_test 3.2 {
|
||||
INSERT INTO t4(t4, rowid, a, b, c) VALUES('delete', 20, 'j k l', '', 'p q r');
|
||||
DELETE FROM x4 WHERE rowid=20;
|
||||
INSERT INTO t4(t4) VALUES('integrity-check');
|
||||
} {}
|
||||
|
||||
|
||||
finish_test
|
||||
|
Reference in New Issue
Block a user