From 7fcb214b9312710de1fb5050e7fdd632fff387f0 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 1 Mar 2012 19:44:20 +0000 Subject: [PATCH 1/6] Add the "languageid=" option to fts4. This code is still largely untested and alsmost certainly buggy. FossilOrigin-Name: bea257f70f10dd1111d79cabd1e1462dc651704d --- ext/fts3/fts3.c | 116 +++++++++++++++++++++++-------- ext/fts3/fts3Int.h | 25 ++++--- ext/fts3/fts3_aux.c | 2 +- ext/fts3/fts3_term.c | 2 +- ext/fts3/fts3_write.c | 155 ++++++++++++++++++++++++++++++++---------- manifest | 24 ++++--- manifest.uuid | 2 +- test/fts4langid.test | 78 +++++++++++++++++++++ 8 files changed, 318 insertions(+), 86 deletions(-) create mode 100644 test/fts4langid.test diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index d17809fe54..6732edaece 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -469,6 +469,7 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ sqlite3_free(p->zReadExprlist); sqlite3_free(p->zWriteExprlist); sqlite3_free(p->zContentTbl); + sqlite3_free(p->zLanguageid); /* Invoke the tokenizer destructor to free the tokenizer. */ p->pTokenizer->pModule->xDestroy(p->pTokenizer); @@ -545,7 +546,9 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){ int rc; /* Return code */ char *zSql; /* SQL statement passed to declare_vtab() */ char *zCols; /* List of user defined columns */ + const char *zLanguageid; + zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid"); sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); /* Create a list of user columns for the virtual table */ @@ -556,7 +559,8 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){ /* Create the whole "CREATE TABLE" statement to pass to SQLite */ zSql = sqlite3_mprintf( - "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName + "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", + zCols, p->zName, zLanguageid ); if( !zCols || !zSql ){ rc = SQLITE_NOMEM; @@ -585,6 +589,7 @@ static int fts3CreateTables(Fts3Table *p){ sqlite3 *db = p->db; /* The database connection */ if( p->zContentTbl==0 ){ + const char *zLanguageid = p->zLanguageid; char *zContentCols; /* Columns of %_content table */ /* Create a list of user columns for the content table */ @@ -593,6 +598,9 @@ static int fts3CreateTables(Fts3Table *p){ char *z = p->azColumn[i]; zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); } + if( zLanguageid && zContentCols ){ + zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid); + } if( zContentCols==0 ) rc = SQLITE_NOMEM; /* Create the content table */ @@ -792,6 +800,7 @@ static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ for(i=0; inColumn; i++){ fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); } + if( p->zLanguageid ) fts3Appendf(pRc, &zRet, ",langid"); sqlite3_free(zFree); }else{ fts3Appendf(pRc, &zRet, "rowid"); @@ -799,7 +808,7 @@ static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); } } - fts3Appendf(pRc, &zRet, "FROM '%q'.'%q%s' AS x", + fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", p->zDb, (p->zContentTbl ? p->zContentTbl : p->zName), (p->zContentTbl ? "" : "_content") @@ -842,6 +851,9 @@ static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ for(i=0; inColumn; i++){ fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", ?"); + } sqlite3_free(zFree); return zRet; } @@ -1057,6 +1069,7 @@ static int fts3InitVtab( char *zCompress = 0; /* compress=? parameter (or NULL) */ char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ char *zContent = 0; /* content=? parameter (or NULL) */ + char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ assert( strlen(argv[0])==4 ); assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) @@ -1106,7 +1119,8 @@ static int fts3InitVtab( { "compress", 8 }, /* 2 -> COMPRESS */ { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ { "order", 5 }, /* 4 -> ORDER */ - { "content", 7 } /* 5 -> CONTENT */ + { "content", 7 }, /* 5 -> CONTENT */ + { "languageid", 10 } /* 6 -> LANGUAGEID */ }; int iOpt; @@ -1160,12 +1174,18 @@ static int fts3InitVtab( bDescIdx = (zVal[0]=='d' || zVal[0]=='D'); break; - default: /* CONTENT */ - assert( iOpt==5 ); - sqlite3_free(zUncompress); + case 5: /* CONTENT */ + sqlite3_free(zContent); zContent = zVal; zVal = 0; break; + + case 6: /* LANGUAGEID */ + assert( iOpt==6 ); + sqlite3_free(zLanguageid); + zLanguageid = zVal; + zVal = 0; + break; } } sqlite3_free(zVal); @@ -1243,7 +1263,9 @@ static int fts3InitVtab( p->bHasStat = isFts4; p->bDescIdx = bDescIdx; p->zContentTbl = zContent; + p->zLanguageid = zLanguageid; zContent = 0; + zLanguageid = 0; TESTONLY( p->inTransaction = -1 ); TESTONLY( p->mxSavepoint = -1 ); @@ -1306,6 +1328,7 @@ fts3_init_out: sqlite3_free(zCompress); sqlite3_free(zUncompress); sqlite3_free(zContent); + sqlite3_free(zLanguageid); sqlite3_free((void *)aCol); if( rc!=SQLITE_OK ){ if( p ){ @@ -1357,6 +1380,7 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts3Table *p = (Fts3Table *)pVTab; int i; /* Iterator variable */ int iCons = -1; /* Index of constraint to use */ + int iLangidCons = -1; /* Index of langid=x constraint, if present */ /* By default use a full table scan. This is an expensive option, ** so search through the constraints to see if a more efficient @@ -1369,7 +1393,8 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ if( pCons->usable==0 ) continue; /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ - if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ + if( iCons<0 + && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 ) ){ pInfo->idxNum = FTS3_DOCID_SEARCH; @@ -1392,7 +1417,13 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn; pInfo->estimatedCost = 2.0; iCons = i; - break; + } + + /* Equality constraint on the langid column */ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ + && pCons->iColumn==p->nColumn + 2 + ){ + iLangidCons = i; } } @@ -1400,6 +1431,9 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ pInfo->aConstraintUsage[iCons].argvIndex = 1; pInfo->aConstraintUsage[iCons].omit = 1; } + if( iLangidCons>=0 ){ + pInfo->aConstraintUsage[iLangidCons].argvIndex = 2; + } /* Regardless of the strategy selected, FTS can deliver rows in rowid (or ** docid) order. Both ascending and descending are possible. @@ -2549,6 +2583,7 @@ static int fts3SegReaderCursorAppend( */ static int fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language id */ int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ @@ -2577,7 +2612,7 @@ static int fts3SegReaderCursor( if( iLevel!=FTS3_SEGCURSOR_PENDING ){ if( rc==SQLITE_OK ){ - rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt); + rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt); } while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ @@ -2622,6 +2657,7 @@ static int fts3SegReaderCursor( */ int sqlite3Fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ + int iLangid, int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ @@ -2646,7 +2682,7 @@ int sqlite3Fts3SegReaderCursor( memset(pCsr, 0, sizeof(Fts3MultiSegReader)); return fts3SegReaderCursor( - p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr + p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr ); } @@ -2658,11 +2694,14 @@ int sqlite3Fts3SegReaderCursor( */ static int fts3SegReaderCursorAddZero( Fts3Table *p, /* FTS virtual table handle */ + int iLangid, const char *zTerm, /* Term to scan doclist of */ int nTerm, /* Number of bytes in zTerm */ Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */ ){ - return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr); + return fts3SegReaderCursor(p, + iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr + ); } /* @@ -2698,8 +2737,9 @@ static int fts3TermSegReaderCursor( for(i=1; bFound==0 && inIndex; i++){ if( p->aIndex[i].nPrefix==nTerm ){ bFound = 1; - rc = sqlite3Fts3SegReaderCursor( - p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr); + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr + ); pSegcsr->bLookup = 1; } } @@ -2707,19 +2747,21 @@ static int fts3TermSegReaderCursor( for(i=1; bFound==0 && inIndex; i++){ if( p->aIndex[i].nPrefix==nTerm+1 ){ bFound = 1; - rc = sqlite3Fts3SegReaderCursor( - p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr ); if( rc==SQLITE_OK ){ - rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr); + rc = fts3SegReaderCursorAddZero( + p, pCsr->iLangid, zTerm, nTerm, pSegcsr + ); } } } } if( bFound==0 ){ - rc = sqlite3Fts3SegReaderCursor( - p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr ); pSegcsr->bLookup = !isPrefix; } @@ -2874,7 +2916,7 @@ static int fts3FilterMethod( UNUSED_PARAMETER(nVal); assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); - assert( nVal==0 || nVal==1 ); + assert( nVal==0 || nVal==1 || nVal==2 ); assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) ); assert( p->pSegments==0 ); @@ -2910,6 +2952,9 @@ static int fts3FilterMethod( return rc; } + pCsr->iLangid = 0; + if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); + rc = sqlite3Fts3ReadLock(p); if( rc!=SQLITE_OK ) return rc; @@ -2971,10 +3016,17 @@ static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ /* ** This is the xColumn method, called by SQLite to request a value from ** the row that the supplied cursor currently points to. +** +** If: +** +** (iCol < p->nColumn) -> The value of the iCol'th user column. +** (iCol == p->nColumn) -> Magic column with the same name as the table. +** (iCol == p->nColumn+1) -> Docid column +** (iCol == p->nColumn+2) -> Langid column */ static int fts3ColumnMethod( sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ int rc = SQLITE_OK; /* Return Code */ @@ -2982,22 +3034,32 @@ static int fts3ColumnMethod( Fts3Table *p = (Fts3Table *)pCursor->pVtab; /* The column value supplied by SQLite must be in range. */ - assert( iCol>=0 && iCol<=p->nColumn+1 ); + assert( iCol>=0 && iCol<=p->nColumn+2 ); if( iCol==p->nColumn+1 ){ /* This call is a request for the "docid" column. Since "docid" is an ** alias for "rowid", use the xRowid() method to obtain the value. */ - sqlite3_result_int64(pContext, pCsr->iPrevId); + sqlite3_result_int64(pCtx, pCsr->iPrevId); }else if( iCol==p->nColumn ){ /* The extra column whose name is the same as the table. - ** Return a blob which is a pointer to the cursor. - */ - sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); + ** Return a blob which is a pointer to the cursor. */ + sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); }else{ + /* The requested column is either a user column (one that contains + ** indexed data), or the language-id column. */ rc = fts3CursorSeek(0, pCsr); - if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){ - sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1)); + + if( rc==SQLITE_OK ){ + if( iCol==p->nColumn+2 ){ + int iLangid = 0; + if( p->zLanguageid ){ + iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1); + } + sqlite3_result_int(pCtx, iLangid); + }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){ + sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); + } } } diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 16c3de0578..8e889181ca 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -192,6 +192,7 @@ struct Fts3Table { char **azColumn; /* column names. malloced */ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ char *zContentTbl; /* content=xxx option, or NULL */ + char *zLanguageid; /* languageid=xxx option, or NULL */ /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. @@ -211,12 +212,12 @@ struct Fts3Table { /* TODO: Fix the first paragraph of this comment. ** - ** The following hash table is used to buffer pending index updates during - ** transactions. Variable nPendingData estimates the memory size of the - ** pending data, including hash table overhead, but not malloc overhead. - ** When nPendingData exceeds nMaxPendingData, the buffer is flushed - ** automatically. Variable iPrevDocid is the docid of the most recently - ** inserted record. + ** The following array of hash tables is used to buffer pending index + ** updates during transactions. Variable nPendingData estimates the memory + ** size of the pending data, including hash table overhead, not including + ** malloc overhead. When nPendingData exceeds nMaxPendingData, the buffer + ** is flushed automatically. Variable iPrevDocid is the docid of the most + ** recently inserted record. ** ** A single FTS4 table may have multiple full-text indexes. For each index ** there is an entry in the aIndex[] array. Index 0 is an index of all the @@ -231,12 +232,13 @@ struct Fts3Table { int nMaxPendingData; /* Max pending data before flush to disk */ int nPendingData; /* Current bytes of pending data */ sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */ + int iPrevLangid; /* Langid of recently inserted document */ #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) /* State variables used for validating that the transaction control ** methods of the virtual table are called at appropriate times. These - ** values do not contribution to the FTS computation; they are used for - ** verifying the SQLite core. + ** values do not contribute to FTS functionality; they are used for + ** verifying the operation of the SQLite core. */ int inTransaction; /* True after xBegin but before xCommit/xRollback */ int mxSavepoint; /* Largest valid xSavepoint integer */ @@ -255,6 +257,7 @@ struct Fts3Cursor { u8 isRequireSeek; /* True if must seek pStmt to %_content row */ sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ Fts3Expr *pExpr; /* Parsed MATCH query string */ + int iLangid; /* Language being queried for */ int nPhrase; /* Number of matchable phrases in query */ Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ @@ -406,7 +409,7 @@ int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64, int sqlite3Fts3SegReaderPending( Fts3Table*,int,const char*,int,int,Fts3SegReader**); void sqlite3Fts3SegReaderFree(Fts3SegReader *); -int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **); +int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **); int sqlite3Fts3ReadLock(Fts3Table *); int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*); @@ -427,8 +430,8 @@ int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*); int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); -int sqlite3Fts3SegReaderCursor( - Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *); +int sqlite3Fts3SegReaderCursor(Fts3Table *, + int, int, int, const char *, int, int, int, Fts3MultiSegReader *); /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 diff --git a/ext/fts3/fts3_aux.c b/ext/fts3/fts3_aux.c index ada85d796b..89bf3ebf6e 100644 --- a/ext/fts3/fts3_aux.c +++ b/ext/fts3/fts3_aux.c @@ -376,7 +376,7 @@ static int fts3auxFilterMethod( if( pCsr->zStop==0 ) return SQLITE_NOMEM; } - rc = sqlite3Fts3SegReaderCursor(pFts3, 0, FTS3_SEGCURSOR_ALL, + rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL, pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr ); if( rc==SQLITE_OK ){ diff --git a/ext/fts3/fts3_term.c b/ext/fts3/fts3_term.c index d3eb690bdd..2108fc1251 100644 --- a/ext/fts3/fts3_term.c +++ b/ext/fts3/fts3_term.c @@ -271,7 +271,7 @@ static int fts3termFilterMethod( pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; pCsr->filter.flags |= FTS3_SEGMENT_SCAN; - rc = sqlite3Fts3SegReaderCursor(pFts3, p->iIndex, FTS3_SEGCURSOR_ALL, + rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL, pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr ); if( rc==SQLITE_OK ){ diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 6d7c3e8d0d..ef2436531c 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -431,6 +431,19 @@ int sqlite3Fts3ReadLock(Fts3Table *p){ return rc; } +static sqlite3_int64 getAbsoluteLevel( + Fts3Table *p, + int iLangid, + int iIndex, + int iLevel +){ + assert( iLangid>=0 ); + assert( p->nIndex>0 ); + assert( iIndex>=0 && iIndexnIndex ); + return (iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL + iLevel; +} + + /* ** Set *ppStmt to a statement handle that may be used to iterate through ** all rows in the %_segdir table, from oldest to newest. If successful, @@ -450,6 +463,7 @@ int sqlite3Fts3ReadLock(Fts3Table *p){ */ int sqlite3Fts3AllSegdirs( Fts3Table *p, /* FTS3 table */ + int iLangid, /* Language being queried */ int iIndex, /* Index for p->aIndex[] */ int iLevel, /* Level to select */ sqlite3_stmt **ppStmt /* OUT: Compiled statement */ @@ -465,8 +479,10 @@ int sqlite3Fts3AllSegdirs( /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL); - sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL-1); + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int(pStmt, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); } }else{ /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ @@ -714,18 +730,28 @@ static int fts3PendingTermsAdd( ** fts3PendingTermsAdd() are to add term/position-list pairs for the ** contents of the document with docid iDocid. */ -static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){ +static int fts3PendingTermsDocid( + Fts3Table *p, /* Full-text table handle */ + int iLangid, /* Language id of row being written */ + sqlite_int64 iDocid /* Docid of row being written */ +){ + assert( iLangid>=0 ); + /* TODO(shess) Explore whether partially flushing the buffer on ** forced-flush would provide better performance. I suspect that if ** we ordered the doclists by size and flushed the largest until the ** buffer was half empty, that would let the less frequent terms ** generate longer doclists. */ - if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){ + if( iDocid<=p->iPrevDocid + || p->iPrevLangid!=iLangid + || p->nPendingData>p->nMaxPendingData + ){ int rc = sqlite3Fts3PendingTermsFlush(p); if( rc!=SQLITE_OK ) return rc; } p->iPrevDocid = iDocid; + p->iPrevLangid = iLangid; return SQLITE_OK; } @@ -779,6 +805,7 @@ static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){ ** apVal[p->nColumn+1] Right-most user-defined column ** apVal[p->nColumn+2] Hidden column with same name as table ** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) +** apVal[p->nColumn+4] Hidden languageid column */ static int fts3InsertData( Fts3Table *p, /* Full-text table */ @@ -809,9 +836,13 @@ static int fts3InsertData( ** defined columns in the FTS3 table, plus one for the docid field. */ rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); - if( rc!=SQLITE_OK ){ - return rc; + if( rc==SQLITE_OK && p->zLanguageid ){ + rc = sqlite3_bind_int( + pContentInsert, p->nColumn+2, + sqlite3_value_int(apVal[p->nColumn+4]) + ); } + if( rc!=SQLITE_OK ) return rc; /* There is a quirk here. The users INSERT statement may have specified ** a value for the "rowid" field, for the "docid" field, or for both. @@ -871,6 +902,15 @@ static int fts3DeleteAll(Fts3Table *p, int bContent){ return rc; } +/* +** +*/ +static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){ + int iLangid = 0; + if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1); + return iLangid; +} + /* ** The first element in the apVal[] array is assumed to contain the docid ** (an integer) of a row about to be deleted. Remove all terms from the @@ -890,16 +930,20 @@ static void fts3DeleteTerms( if( rc==SQLITE_OK ){ if( SQLITE_ROW==sqlite3_step(pSelect) ){ int i; - for(i=1; i<=p->nColumn; i++){ + rc = fts3PendingTermsDocid(p, + langidFromSelect(p, pSelect), + sqlite3_column_int64(pSelect, 0) + ); + for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ const char *zText = (const char *)sqlite3_column_text(pSelect, i); rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]); - if( rc!=SQLITE_OK ){ - sqlite3_reset(pSelect); - *pRC = rc; - return; - } aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); } + if( rc!=SQLITE_OK ){ + sqlite3_reset(pSelect); + *pRC = rc; + return; + } } rc = sqlite3_reset(pSelect); }else{ @@ -912,7 +956,7 @@ static void fts3DeleteTerms( ** Forward declaration to account for the circular dependency between ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). */ -static int fts3SegmentMerge(Fts3Table *, int, int); +static int fts3SegmentMerge(Fts3Table *, int, int, int); /* ** This function allocates a new level iLevel index in the segdir table. @@ -931,6 +975,7 @@ static int fts3SegmentMerge(Fts3Table *, int, int); */ static int fts3AllocateSegdirIdx( Fts3Table *p, + int iLangid, /* Language id */ int iIndex, /* Index for p->aIndex */ int iLevel, int *piIdx @@ -939,10 +984,15 @@ static int fts3AllocateSegdirIdx( sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ int iNext = 0; /* Result of query pNextIdx */ + assert( iLangid>=0 ); + assert( p->nIndex>=1 ); + /* Set variable iNext to the next available segdir index at level iLevel. */ rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int(pNextIdx, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel); + sqlite3_bind_int64( + pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) + ); if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ iNext = sqlite3_column_int(pNextIdx, 0); } @@ -956,7 +1006,7 @@ static int fts3AllocateSegdirIdx( ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. */ if( iNext>=FTS3_MERGE_COUNT ){ - rc = fts3SegmentMerge(p, iIndex, iLevel); + rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); *piIdx = 0; }else{ *piIdx = iNext; @@ -2216,6 +2266,7 @@ static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){ */ static int fts3DeleteSegdir( Fts3Table *p, /* Virtual table handle */ + int iLangid, /* Language id */ int iIndex, /* Index for p->aIndex */ int iLevel, /* Level of %_segdir entries to delete */ Fts3SegReader **apSegment, /* Array of SegReader objects */ @@ -2243,13 +2294,15 @@ static int fts3DeleteSegdir( if( iLevel==FTS3_SEGCURSOR_ALL ){ rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL); - sqlite3_bind_int(pDelete, 2, (iIndex+1) * FTS3_SEGDIR_MAXLEVEL - 1); + sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int(pDelete, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); } }else{ rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel); + sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel)); } } @@ -2718,13 +2771,18 @@ void sqlite3Fts3SegReaderFinish( ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, ** an SQLite error code is returned. */ -static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){ +static int fts3SegmentMerge( + Fts3Table *p, + int iLangid, /* Language id to merge */ + int iIndex, /* Index in p->aIndex[] to merge */ + int iLevel /* Level to merge */ +){ int rc; /* Return code */ int iIdx = 0; /* Index of new segment */ int iNewLevel = 0; /* Level/index to create new segment at */ SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ - Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ + Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ int bIgnoreEmpty = 0; /* True to ignore empty segments */ assert( iLevel==FTS3_SEGCURSOR_ALL @@ -2734,7 +2792,7 @@ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){ assert( iLevel=0 && iIndexnIndex ); - rc = sqlite3Fts3SegReaderCursor(p, iIndex, iLevel, 0, 0, 1, 0, &csr); + rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; if( iLevel==FTS3_SEGCURSOR_ALL ){ @@ -2750,20 +2808,20 @@ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){ bIgnoreEmpty = 1; }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ - iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL; - rc = fts3AllocateSegdirIdx(p, iIndex, 0, &iIdx); + iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0); + rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx); }else{ /* This call is to merge all segments at level iLevel. find the next ** available segment index at level iLevel+1. The call to ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to ** a single iLevel+2 segment if necessary. */ - rc = fts3AllocateSegdirIdx(p, iIndex, iLevel+1, &iIdx); - iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL + iLevel+1; + rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); + iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); } if( rc!=SQLITE_OK ) goto finished; assert( csr.nSegment>0 ); - assert( iNewLevel>=(iIndex*FTS3_SEGDIR_MAXLEVEL) ); - assert( iNewLevel<((iIndex+1)*FTS3_SEGDIR_MAXLEVEL) ); + assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); + assert( iNewLevelnIndex; i++){ - rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_PENDING); + rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING); if( rc==SQLITE_DONE ) rc = SQLITE_OK; } sqlite3Fts3PendingTermsClear(p); @@ -2954,12 +3014,16 @@ static void fts3UpdateDocTotals( sqlite3_free(a); } +/* +** Merge the entire database so that there is one segment for each +** iIndex/iLangid combination. +*/ static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ int i; int bSeenDone = 0; int rc = SQLITE_OK; for(i=0; rc==SQLITE_OK && inIndex; i++){ - rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_ALL); + rc = fts3SegmentMerge(p, 0, i, FTS3_SEGCURSOR_ALL); if( rc==SQLITE_DONE ){ bSeenDone = 1; rc = SQLITE_OK; @@ -3015,7 +3079,9 @@ static int fts3DoRebuild(Fts3Table *p){ while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ int iCol; - rc = fts3PendingTermsDocid(p, sqlite3_column_int64(pStmt, 0)); + rc = fts3PendingTermsDocid(p, + langidFromSelect(p, pStmt), sqlite3_column_int64(pStmt, 0) + ); aSz[p->nColumn] = 0; for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); @@ -3245,8 +3311,6 @@ static int fts3DeleteByRowid( rc = fts3DeleteAll(p, 1); *pnDoc = *pnDoc - 1; }else{ - sqlite3_int64 iRemove = sqlite3_value_int64(pRowid); - rc = fts3PendingTermsDocid(p, iRemove); fts3DeleteTerms(&rc, p, pRowid, aSzDel); if( p->zContentTbl==0 ){ fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); @@ -3265,7 +3329,16 @@ static int fts3DeleteByRowid( /* ** This function does the work for the xUpdate method of FTS3 virtual -** tables. +** tables. The schema of the virtual table being: +** +** CREATE TABLE ( +** , +**
HIDDEN, +** docid HIDDEN, +** HIDDEN +** ); +** +** */ int sqlite3Fts3UpdateMethod( sqlite3_vtab *pVtab, /* FTS3 vtab object */ @@ -3282,6 +3355,10 @@ int sqlite3Fts3UpdateMethod( int bInsertDone = 0; assert( p->pSegments==0 ); + assert( + nArg==1 /* DELETE operations */ + || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ + ); /* Check for a "special" INSERT operation. One of the form: ** @@ -3295,6 +3372,11 @@ int sqlite3Fts3UpdateMethod( goto update_out; } + if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ + rc = SQLITE_CONSTRAINT; + goto update_out; + } + /* Allocate space to hold the change in document sizes */ aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 ); if( aSzIns==0 ){ @@ -3369,7 +3451,10 @@ int sqlite3Fts3UpdateMethod( } } if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ - rc = fts3PendingTermsDocid(p, *pRowid); + rc = fts3PendingTermsDocid(p, + sqlite3_value_int(apVal[2 + p->nColumn + 2]), + *pRowid + ); } if( rc==SQLITE_OK ){ assert( p->iPrevDocid==*pRowid ); diff --git a/manifest b/manifest index ffe0e4775e..c4b93f56a0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sspurious\serrors\sthat\smay\soccur\sif\san\sempty\sdatabase\sis\sopened\sand\sthen\sinitialized\sas\sa\sWAL\sdatabase\sby\sa\ssecond\sconnection. -D 2012-02-28T17:57:34.628 +C Add\sthe\s"languageid="\soption\sto\sfts4.\sThis\scode\sis\sstill\slargely\suntested\sand\salsmost\scertainly\sbuggy. +D 2012-03-01T19:44:20.362 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -63,22 +63,22 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 4cf7b8e5bbb6667f5d7818fa0bf064fbbb72b086 +F ext/fts3/fts3.c 93a8eb6e6eb4cd0aa4856d841a9d8d0025a2784a F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h ce958a6fa92a95462853aa3acc0b69bcda39102f -F ext/fts3/fts3_aux.c 0ebfa7b86cf8ff6a0861605fcc63b83ec1b70691 +F ext/fts3/fts3Int.h 8ba2d8ce5db6da67c5e5e7b8a0b90e6d80826546 +F ext/fts3/fts3_aux.c 72de4cb43db7bfc2f68fbda04b7d8095ae9a6239 F ext/fts3/fts3_expr.c f5df26bddf46a5916b2a5f80c4027996e92b7b15 F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914 F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c 6c8f395cdf9e1e3afa7fadb7e523dbbf381c6dfa F ext/fts3/fts3_porter.c b7e5276f9f0a5fc7018b6fa55ce0f31f269ef881 F ext/fts3/fts3_snippet.c 1f9ee6a8e0e242649645968dcec4deb253d86c2a -F ext/fts3/fts3_term.c a5457992723455a58804cb75c8cbd8978db5c2ef +F ext/fts3/fts3_term.c d3466cf99432291be08e379d89645462431809d6 F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894 F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68 -F ext/fts3/fts3_write.c 1721187a4dec29ef9ae648ad8478da741085af18 +F ext/fts3/fts3_write.c 489d262b1ee9ab1dbb4da48bd8737fac15d0f58f F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -496,6 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2 F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659 F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68 F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f +F test/fts4langid.test 3d968b7c0afb8be1609794267f34b89d378a81ea F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a @@ -991,7 +992,10 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P c267893a0813beb1764071409025e178318e1ca3 -R 751c4f9505bcae82ab2d498f42151225 +P 16330a2f7262173a32ae48a72c0ee2522b6dc554 +R 86036df8ba11902f17395620671e5794 +T *branch * fts4-languageid +T *sym-fts4-languageid * +T -sym-trunk * U dan -Z aacac9f6818b59f3fbe792ef77401913 +Z 6902c01b6e8a000d5e06f8fe8778490f diff --git a/manifest.uuid b/manifest.uuid index df6fdce3ee..d2201717b8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -16330a2f7262173a32ae48a72c0ee2522b6dc554 \ No newline at end of file +bea257f70f10dd1111d79cabd1e1462dc651704d \ No newline at end of file diff --git a/test/fts4langid.test b/test/fts4langid.test new file mode 100644 index 0000000000..a65ccedac3 --- /dev/null +++ b/test/fts4langid.test @@ -0,0 +1,78 @@ +# 2012 March 01 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the languageid=xxx FTS4 option. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set ::testprefix fts4content + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +set ::testprefix fts4langid + + + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); +} + +do_execsql_test 1.2 { + SELECT sql FROM sqlite_master WHERE name = 't1_content'; +} {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}} + +do_execsql_test 1.3 {SELECT docid FROM t1} {} +do_execsql_test 1.4 {SELECT lang_id FROM t1} {} + +do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')} +do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0} + +do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)} +do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4} + +do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')} +do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0} + +do_execsql_test 1.11 { + CREATE VIRTUAL TABLE t2 USING fts4; + INSERT INTO t2 VALUES('abc'); +} +do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1 + +do_execsql_test 1.13 { + DROP TABLE t1; + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); + INSERT INTO t1(content) VALUES('a b c'); + INSERT INTO t1(content, lang_id) VALUES('a b c', 1); +} + +do_execsql_test 1.14 { + SELECT rowid FROM t1 WHERE t1 MATCH 'b'; +} {1} +do_execsql_test 1.15 { + SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0; +} {1} + +do_execsql_test 1.16 { + SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1; +} {2} + +do_catchsql_test 1.17 { + INSERT INTO t1(content, lang_id) VALUES('123', -1); +} {1 {constraint failed}} + +finish_test + From e3ddd1ac42834a0189fa359a32755c2e9b733224 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 2 Mar 2012 11:48:50 +0000 Subject: [PATCH 2/6] Fix a bug in merging FTS language tables for languages other than language 0. FossilOrigin-Name: d281cb8984c911a4c0cce2ec299e1351d8e580e4 --- ext/fts3/fts3_write.c | 2 +- manifest | 17 ++-- manifest.uuid | 2 +- test/fts4langid.test | 181 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+), 12 deletions(-) diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index ef2436531c..b158cc4510 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -488,7 +488,7 @@ int sqlite3Fts3AllSegdirs( /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int(pStmt, 1, iLevel+iIndex*FTS3_SEGDIR_MAXLEVEL); + sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); } } *ppStmt = pStmt; diff --git a/manifest b/manifest index c4b93f56a0..e051362a62 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\s"languageid="\soption\sto\sfts4.\sThis\scode\sis\sstill\slargely\suntested\sand\salsmost\scertainly\sbuggy. -D 2012-03-01T19:44:20.362 +C Fix\sa\sbug\sin\smerging\sFTS\slanguage\stables\sfor\slanguages\sother\sthan\slanguage\s0. +D 2012-03-02T11:48:50.564 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -78,7 +78,7 @@ F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894 F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68 -F ext/fts3/fts3_write.c 489d262b1ee9ab1dbb4da48bd8737fac15d0f58f +F ext/fts3/fts3_write.c 36fc2e3a28f51ee135a344877c1e4be0a9f45e6e F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -496,7 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2 F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659 F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68 F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f -F test/fts4langid.test 3d968b7c0afb8be1609794267f34b89d378a81ea +F test/fts4langid.test 7ab7be619d3acb3727e4bef3230ba3dbcf2e0556 F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a @@ -992,10 +992,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P 16330a2f7262173a32ae48a72c0ee2522b6dc554 -R 86036df8ba11902f17395620671e5794 -T *branch * fts4-languageid -T *sym-fts4-languageid * -T -sym-trunk * +P bea257f70f10dd1111d79cabd1e1462dc651704d +R a3a9247d2c76c9d90f9fc486f3311f0d U dan -Z 6902c01b6e8a000d5e06f8fe8778490f +Z f1e998b56e58f712fe6da1411961b8ef diff --git a/manifest.uuid b/manifest.uuid index d2201717b8..ce8a1c3b82 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bea257f70f10dd1111d79cabd1e1462dc651704d \ No newline at end of file +d281cb8984c911a4c0cce2ec299e1351d8e580e4 \ No newline at end of file diff --git a/test/fts4langid.test b/test/fts4langid.test index a65ccedac3..51c42ddb54 100644 --- a/test/fts4langid.test +++ b/test/fts4langid.test @@ -24,6 +24,27 @@ ifcapable !fts3 { set ::testprefix fts4langid +#--------------------------------------------------------------------------- +# Test plan: +# +# 1.* - Warm-body tests created for specific purposes during development. +# Passing these doesn't really prove much. +# +# 2.* - Test that FTS queries only ever return rows associated with +# the requested language. +# +# 3.* - Test that the 'optimize' and 'rebuild' commands work correctly. +# +# 4.* - Test that if one is provided, the tokenizer xLanguage method +# is called to configure the tokenizer before tokenizing query +# or document text. +# +# 5.* - Test the fts4aux table when the associated FTS4 table contains +# multiple languages. +# +# 6.* - Tests with content= tables. Both where there is a real +# underlying content table and where there is not. +# do_execsql_test 1.1 { @@ -74,5 +95,165 @@ do_catchsql_test 1.17 { INSERT INTO t1(content, lang_id) VALUES('123', -1); } {1 {constraint failed}} +do_execsql_test 1.18 { + DROP TABLE t1; + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); + INSERT INTO t1(content, lang_id) VALUES('A', 13); + INSERT INTO t1(content, lang_id) VALUES('B', 13); + INSERT INTO t1(content, lang_id) VALUES('C', 13); + INSERT INTO t1(content, lang_id) VALUES('D', 13); + INSERT INTO t1(content, lang_id) VALUES('E', 13); + INSERT INTO t1(content, lang_id) VALUES('F', 13); + INSERT INTO t1(content, lang_id) VALUES('G', 13); + INSERT INTO t1(content, lang_id) VALUES('H', 13); + INSERT INTO t1(content, lang_id) VALUES('I', 13); + INSERT INTO t1(content, lang_id) VALUES('J', 13); + INSERT INTO t1(content, lang_id) VALUES('K', 13); + INSERT INTO t1(content, lang_id) VALUES('L', 13); + INSERT INTO t1(content, lang_id) VALUES('M', 13); + INSERT INTO t1(content, lang_id) VALUES('N', 13); + INSERT INTO t1(content, lang_id) VALUES('O', 13); + INSERT INTO t1(content, lang_id) VALUES('P', 13); + INSERT INTO t1(content, lang_id) VALUES('Q', 13); + INSERT INTO t1(content, lang_id) VALUES('R', 13); + INSERT INTO t1(content, lang_id) VALUES('S', 13); + SELECT rowid FROM t1 WHERE t1 MATCH 'A'; +} {} + + +#------------------------------------------------------------------------- +# Test cases 2.* +# + +proc build_multilingual_db_1 {db} { + $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } + + set xwords [list zero one two three four five six seven eight nine ten] + set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] + + for {set i 0} {$i < 1000} {incr i} { + set iLangid [expr $i%9] + set x "" + set y "" + + set x [list] + lappend x [lindex $xwords [expr ($i / 1000) % 10]] + lappend x [lindex $xwords [expr ($i / 100) % 10]] + lappend x [lindex $xwords [expr ($i / 10) % 10]] + lappend x [lindex $xwords [expr ($i / 1) % 10]] + + set y [list] + lappend y [lindex $ywords [expr ($i / 1000) % 10]] + lappend y [lindex $ywords [expr ($i / 100) % 10]] + lappend y [lindex $ywords [expr ($i / 10) % 10]] + lappend y [lindex $ywords [expr ($i / 1) % 10]] + + $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } + } +} + +proc rowid_list_set_langid {langid} { + set ::rowid_list_langid $langid +} +proc rowid_list {pattern} { + set langid $::rowid_list_langid + set res [list] + db eval {SELECT docid, x, y FROM t2 WHERE l = $langid ORDER BY docid ASC} { + if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { + lappend res $docid + } + } + return $res +} + +proc or_merge_list {list1 list2} { + set res [list] + + set i1 0 + set i2 0 + + set n1 [llength $list1] + set n2 [llength $list2] + + while {$i1 < $n1 && $i2 < $n2} { + set e1 [lindex $list1 $i1] + set e2 [lindex $list2 $i2] + + if {$e1==$e2} { + lappend res $e1 + incr i1 + incr i2 + } elseif {$e1 < $e2} { + lappend res $e1 + incr i1 + } else { + lappend res $e2 + incr i2 + } + } + + concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end] +} + +proc or_merge_lists {args} { + set res [lindex $args 0] + for {set i 1} {$i < [llength $args]} {incr i} { + set res [or_merge_list $res [lindex $args $i]] + } + set res +} + +proc and_merge_list {list1 list2} { + foreach i $list2 { set a($i) 1 } + set res [list] + foreach i $list1 { + if {[info exists a($i)]} {lappend res $i} + } + set res +} + + +proc and_merge_lists {args} { + set res [lindex $args 0] + for {set i 1} {$i < [llength $args]} {incr i} { + set res [and_merge_list $res [lindex $args $i]] + } + set res +} + +proc filter_list {list langid} { + set res [list] + foreach i $list { + if {($i % 9) == $langid} {lappend res $i} + } + set res +} + +do_test 2.0 { + reset_db + build_multilingual_db_1 db +} {} + +proc do_test_2.1 {tn query res_script} { + for {set langid 0} {$langid < 10} {incr langid} { + rowid_list_set_langid $langid + set res [eval $res_script] + + set actual [ + execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} + ] + do_test 2.1.$tn.$langid [list set {} $actual] $res + } +} + +do_test_2.1 1 {delta} { rowid_list delta } +do_test_2.1 2 {"zero one two"} { rowid_list "zero one two" } +do_test_2.1 3 {zero one two} { + and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] +} +do_test_2.1 4 {"zero one" OR "one two"} { + or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] +} + finish_test From 8fb662b0beb67f2400f1747283f9bfc7cbc0d0a9 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 2 Mar 2012 12:26:19 +0000 Subject: [PATCH 3/6] Fix the FTS 'optimize' command on multi-lingual databases. FossilOrigin-Name: 65fa693729a336e4d905ce72e6b9ccf4faa772bd --- ext/fts3/fts3Int.h | 2 +- ext/fts3/fts3_write.c | 45 ++++++++++++++++++++++++++++++++----------- manifest | 16 +++++++-------- manifest.uuid | 2 +- test/fts4langid.test | 28 +++++++++++++++++++++------ 5 files changed, 66 insertions(+), 27 deletions(-) diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 8e889181ca..078b5b987b 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -197,7 +197,7 @@ struct Fts3Table { /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. */ - sqlite3_stmt *aStmt[27]; + sqlite3_stmt *aStmt[28]; char *zReadExprlist; char *zWriteExprlist; diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index b158cc4510..decbe0b73a 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -232,6 +232,8 @@ struct SegmentNode { #define SQL_DELETE_SEGDIR_RANGE 26 +#define SQL_SELECT_ALL_LANGID 27 + /* ** This function is used to obtain an SQLite prepared statement handle ** for the statement identified by the second argument. If successful, @@ -285,6 +287,7 @@ static int fts3SqlStmt( /* 25 */ "", /* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", +/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'", }; int rc = SQLITE_OK; @@ -2229,7 +2232,12 @@ static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ ** ** Return SQLITE_OK if successful, or an SQLite error code if not. */ -static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){ +static int fts3SegmentMaxLevel( + Fts3Table *p, + int iLangid, + int iIndex, + int *pnMax +){ sqlite3_stmt *pStmt; int rc; assert( iIndex>=0 && iIndexnIndex ); @@ -2242,8 +2250,10 @@ static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){ */ rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); if( rc!=SQLITE_OK ) return rc; - sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL); - sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL - 1); + sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int(pStmt, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); if( SQLITE_ROW==sqlite3_step(pStmt) ){ *pnMax = sqlite3_column_int(pStmt, 0); } @@ -2804,7 +2814,7 @@ static int fts3SegmentMerge( rc = SQLITE_DONE; goto finished; } - rc = fts3SegmentMaxLevel(p, iIndex, &iNewLevel); + rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel); bIgnoreEmpty = 1; }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ @@ -3019,16 +3029,29 @@ static void fts3UpdateDocTotals( ** iIndex/iLangid combination. */ static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ - int i; int bSeenDone = 0; - int rc = SQLITE_OK; - for(i=0; rc==SQLITE_OK && inIndex; i++){ - rc = fts3SegmentMerge(p, 0, i, FTS3_SEGCURSOR_ALL); - if( rc==SQLITE_DONE ){ - bSeenDone = 1; - rc = SQLITE_OK; + int rc; + sqlite3_stmt *pAllLangid = 0; + + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int(pAllLangid, 1, p->nIndex); + while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ + int i; + int iLangid = sqlite3_column_int(pAllLangid, 0); + for(i=0; rc==SQLITE_OK && inIndex; i++){ + rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL); + if( rc==SQLITE_DONE ){ + bSeenDone = 1; + rc = SQLITE_OK; + } + } } + rc2 = sqlite3_reset(pAllLangid); + if( rc==SQLITE_OK ) rc = rc2; } + sqlite3Fts3SegmentsClose(p); sqlite3Fts3PendingTermsClear(p); diff --git a/manifest b/manifest index e051362a62..60424716c2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sbug\sin\smerging\sFTS\slanguage\stables\sfor\slanguages\sother\sthan\slanguage\s0. -D 2012-03-02T11:48:50.564 +C Fix\sthe\sFTS\s'optimize'\scommand\son\smulti-lingual\sdatabases. +D 2012-03-02T12:26:19.396 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -65,7 +65,7 @@ F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d F ext/fts3/fts3.c 93a8eb6e6eb4cd0aa4856d841a9d8d0025a2784a F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h 8ba2d8ce5db6da67c5e5e7b8a0b90e6d80826546 +F ext/fts3/fts3Int.h 521d300f2af4e741f53c4e2dd540275fb64533eb F ext/fts3/fts3_aux.c 72de4cb43db7bfc2f68fbda04b7d8095ae9a6239 F ext/fts3/fts3_expr.c f5df26bddf46a5916b2a5f80c4027996e92b7b15 F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914 @@ -78,7 +78,7 @@ F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894 F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68 -F ext/fts3/fts3_write.c 36fc2e3a28f51ee135a344877c1e4be0a9f45e6e +F ext/fts3/fts3_write.c 35b98a42f9bbdd28af1b1f3bb0c09ff07090a764 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -496,7 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2 F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659 F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68 F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f -F test/fts4langid.test 7ab7be619d3acb3727e4bef3230ba3dbcf2e0556 +F test/fts4langid.test a793f2da4cbe9c8ad2f49d2a013c6a0ff61e1783 F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a @@ -992,7 +992,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P bea257f70f10dd1111d79cabd1e1462dc651704d -R a3a9247d2c76c9d90f9fc486f3311f0d +P d281cb8984c911a4c0cce2ec299e1351d8e580e4 +R 2e9c73a6dc49c268fdce6f613b343e49 U dan -Z f1e998b56e58f712fe6da1411961b8ef +Z bf6a4c44b9753bccc8cfbcc7423214ad diff --git a/manifest.uuid b/manifest.uuid index ce8a1c3b82..9089612df2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d281cb8984c911a4c0cce2ec299e1351d8e580e4 \ No newline at end of file +65fa693729a336e4d905ce72e6b9ccf4faa772bd \ No newline at end of file diff --git a/test/fts4langid.test b/test/fts4langid.test index 51c42ddb54..310332d650 100644 --- a/test/fts4langid.test +++ b/test/fts4langid.test @@ -234,7 +234,7 @@ do_test 2.0 { build_multilingual_db_1 db } {} -proc do_test_2.1 {tn query res_script} { +proc do_test_2 {tn query res_script} { for {set langid 0} {$langid < 10} {incr langid} { rowid_list_set_langid $langid set res [eval $res_script] @@ -242,16 +242,32 @@ proc do_test_2.1 {tn query res_script} { set actual [ execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} ] - do_test 2.1.$tn.$langid [list set {} $actual] $res + do_test 2.$tn.$langid [list set {} $actual] $res } } -do_test_2.1 1 {delta} { rowid_list delta } -do_test_2.1 2 {"zero one two"} { rowid_list "zero one two" } -do_test_2.1 3 {zero one two} { +# Run some queries. +do_test_2 1.1 {delta} { rowid_list delta } +do_test_2 1.2 {"zero one two"} { rowid_list "zero one two" } +do_test_2 1.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } -do_test_2.1 4 {"zero one" OR "one two"} { +do_test_2 1.4 {"zero one" OR "one two"} { + or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] +} + +# Now try the same tests as above, but after running the 'optimize' +# command on the FTS table. +do_execsql_test 2.2 { + INSERT INTO t2(t2) VALUES('optimize'); + SELECT count(*) FROM t2_segdir; +} {9} +do_test_2 2.1 {delta} { rowid_list delta } +do_test_2 2.2 {"zero one two"} { rowid_list "zero one two" } +do_test_2 2.3 {zero one two} { + and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] +} +do_test_2 2.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } From c0db214b107baa92e4985551c8431eb15b3d3261 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 2 Mar 2012 16:18:21 +0000 Subject: [PATCH 4/6] Add test for FTS 'rebuild' command. FossilOrigin-Name: 181bc35731f19c3e4497ba3338c209918d34ea69 --- manifest | 12 ++++++------ manifest.uuid | 2 +- test/fts4langid.test | 40 +++++++++++++++++++++++++++++----------- 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/manifest b/manifest index 60424716c2..268d6f64f6 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\sFTS\s'optimize'\scommand\son\smulti-lingual\sdatabases. -D 2012-03-02T12:26:19.396 +C Add\stest\sfor\sFTS\s'rebuild'\scommand. +D 2012-03-02T16:18:21.845 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -496,7 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2 F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659 F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68 F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f -F test/fts4langid.test a793f2da4cbe9c8ad2f49d2a013c6a0ff61e1783 +F test/fts4langid.test 343a65d54419b45a318a1acdb1c37749f26fdc79 F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a @@ -992,7 +992,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P d281cb8984c911a4c0cce2ec299e1351d8e580e4 -R 2e9c73a6dc49c268fdce6f613b343e49 +P 65fa693729a336e4d905ce72e6b9ccf4faa772bd +R 4e2c13b79ff592d73e478288d906b721 U dan -Z bf6a4c44b9753bccc8cfbcc7423214ad +Z bcf6ed8432a11d3fdd53825eba08d4d7 diff --git a/manifest.uuid b/manifest.uuid index 9089612df2..bdb54dd27f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -65fa693729a336e4d905ce72e6b9ccf4faa772bd \ No newline at end of file +181bc35731f19c3e4497ba3338c209918d34ea69 \ No newline at end of file diff --git a/test/fts4langid.test b/test/fts4langid.test index 310332d650..059a56d08f 100644 --- a/test/fts4langid.test +++ b/test/fts4langid.test @@ -30,19 +30,21 @@ set ::testprefix fts4langid # 1.* - Warm-body tests created for specific purposes during development. # Passing these doesn't really prove much. # -# 2.* - Test that FTS queries only ever return rows associated with -# the requested language. +# 2.1.* - Test that FTS queries only ever return rows associated with +# the requested language. # -# 3.* - Test that the 'optimize' and 'rebuild' commands work correctly. +# 2.2.* - Same as 2.1.*, after an 'optimize' command. # -# 4.* - Test that if one is provided, the tokenizer xLanguage method +# 2.3.* - Same as 2.1.*, after a 'rebuild' command. +# +# 3.* - Test that if one is provided, the tokenizer xLanguage method # is called to configure the tokenizer before tokenizing query # or document text. # -# 5.* - Test the fts4aux table when the associated FTS4 table contains +# 4.* - Test the fts4aux table when the associated FTS4 table contains # multiple languages. # -# 6.* - Tests with content= tables. Both where there is a real +# 5.* - Tests with content= tables. Both where there is a real # underlying content table and where there is not. # @@ -124,7 +126,6 @@ do_execsql_test 1.18 { #------------------------------------------------------------------------- # Test cases 2.* # - proc build_multilingual_db_1 {db} { $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } @@ -258,16 +259,33 @@ do_test_2 1.4 {"zero one" OR "one two"} { # Now try the same tests as above, but after running the 'optimize' # command on the FTS table. +# do_execsql_test 2.2 { INSERT INTO t2(t2) VALUES('optimize'); SELECT count(*) FROM t2_segdir; } {9} -do_test_2 2.1 {delta} { rowid_list delta } -do_test_2 2.2 {"zero one two"} { rowid_list "zero one two" } -do_test_2 2.3 {zero one two} { +do_test_2 2.1 {delta} { rowid_list delta } +do_test_2 2.2 {"zero one two"} { rowid_list "zero one two" } +do_test_2 2.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } -do_test_2 2.4 {"zero one" OR "one two"} { +do_test_2 2.4 {"zero one" OR "one two"} { + or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] +} + +# And rebuild. +# +do_test 2.3 { + reset_db + build_multilingual_db_1 db + execsql { INSERT INTO t2(t2) VALUES('rebuild') } +} {} +do_test_2 3.1 {delta} { rowid_list delta } +do_test_2 3.2 {"zero one two"} { rowid_list "zero one two" } +do_test_2 3.3 {zero one two} { + and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] +} +do_test_2 3.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } From 7395599031f856ac889a575a6192bb0e756af04e Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 2 Mar 2012 19:53:02 +0000 Subject: [PATCH 5/6] Fix problems with combining content= and languageid= in a single fts4 table. FossilOrigin-Name: 22491e7bc38aee43819b888e04241cb6a6ef73a3 --- ext/fts3/fts3.c | 23 +++++++++- manifest | 14 +++---- manifest.uuid | 2 +- test/fts4langid.test | 99 ++++++++++++++++++++++++++++++++++---------- 4 files changed, 106 insertions(+), 32 deletions(-) diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 6732edaece..dec7f8722c 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -800,13 +800,18 @@ static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ for(i=0; inColumn; i++){ fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); } - if( p->zLanguageid ) fts3Appendf(pRc, &zRet, ",langid"); + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", x.%Q", "langid"); + } sqlite3_free(zFree); }else{ fts3Appendf(pRc, &zRet, "rowid"); for(i=0; inColumn; i++){ fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid); + } } fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", p->zDb, @@ -1215,8 +1220,20 @@ static int fts3InitVtab( sqlite3_free((void*)aCol); aCol = 0; rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString); + + /* If a languageid= option was specified, remove the language id + ** column from the aCol[] array. */ + if( rc==SQLITE_OK && zLanguageid ){ + int j; + for(j=0; j0 ); } if( rc!=SQLITE_OK ) goto fts3_init_out; @@ -3045,6 +3062,8 @@ static int fts3ColumnMethod( /* The extra column whose name is the same as the table. ** Return a blob which is a pointer to the cursor. */ sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); + }else if( iCol==p->nColumn+2 && pCsr->pExpr ){ + sqlite3_result_int64(pCtx, pCsr->iLangid); }else{ /* The requested column is either a user column (one that contains ** indexed data), or the language-id column. */ diff --git a/manifest b/manifest index 268d6f64f6..15fcd20c9b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stest\sfor\sFTS\s'rebuild'\scommand. -D 2012-03-02T16:18:21.845 +C Fix\sproblems\swith\scombining\scontent=\sand\slanguageid=\sin\sa\ssingle\sfts4\stable. +D 2012-03-02T19:53:02.350 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -63,7 +63,7 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 93a8eb6e6eb4cd0aa4856d841a9d8d0025a2784a +F ext/fts3/fts3.c fd89caa4169520c32cf46ca5a62df6dd48201422 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3Int.h 521d300f2af4e741f53c4e2dd540275fb64533eb F ext/fts3/fts3_aux.c 72de4cb43db7bfc2f68fbda04b7d8095ae9a6239 @@ -496,7 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2 F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659 F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68 F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f -F test/fts4langid.test 343a65d54419b45a318a1acdb1c37749f26fdc79 +F test/fts4langid.test be989b5cddcd7596b87232af193f6c4560a34272 F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a @@ -992,7 +992,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P 65fa693729a336e4d905ce72e6b9ccf4faa772bd -R 4e2c13b79ff592d73e478288d906b721 +P 181bc35731f19c3e4497ba3338c209918d34ea69 +R acfb20f690a18ac8d67e116ae8c76f7d U dan -Z bcf6ed8432a11d3fdd53825eba08d4d7 +Z b771fbbc3f5dd49e39970a5f917fc8b7 diff --git a/manifest.uuid b/manifest.uuid index bdb54dd27f..def5beae28 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -181bc35731f19c3e4497ba3338c209918d34ea69 \ No newline at end of file +22491e7bc38aee43819b888e04241cb6a6ef73a3 \ No newline at end of file diff --git a/test/fts4langid.test b/test/fts4langid.test index 059a56d08f..b9110cb768 100644 --- a/test/fts4langid.test +++ b/test/fts4langid.test @@ -37,17 +37,17 @@ set ::testprefix fts4langid # # 2.3.* - Same as 2.1.*, after a 'rebuild' command. # -# 3.* - Test that if one is provided, the tokenizer xLanguage method +# 3.* - Tests with content= tables. Both where there is a real +# underlying content table and where there is not. +# +# +# 4.* - Test that if one is provided, the tokenizer xLanguage method # is called to configure the tokenizer before tokenizing query # or document text. # -# 4.* - Test the fts4aux table when the associated FTS4 table contains +# 5.* - Test the fts4aux table when the associated FTS4 table contains # multiple languages. # -# 5.* - Tests with content= tables. Both where there is a real -# underlying content table and where there is not. -# - do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); @@ -151,6 +151,11 @@ proc build_multilingual_db_1 {db} { $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } } + + $db eval { + CREATE TABLE data(x, y, l); + INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2; + } } proc rowid_list_set_langid {langid} { @@ -159,9 +164,9 @@ proc rowid_list_set_langid {langid} { proc rowid_list {pattern} { set langid $::rowid_list_langid set res [list] - db eval {SELECT docid, x, y FROM t2 WHERE l = $langid ORDER BY docid ASC} { + db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} { if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { - lappend res $docid + lappend res $rowid } } return $res @@ -235,7 +240,7 @@ do_test 2.0 { build_multilingual_db_1 db } {} -proc do_test_2 {tn query res_script} { +proc do_test_query1 {tn query res_script} { for {set langid 0} {$langid < 10} {incr langid} { rowid_list_set_langid $langid set res [eval $res_script] @@ -243,17 +248,17 @@ proc do_test_2 {tn query res_script} { set actual [ execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} ] - do_test 2.$tn.$langid [list set {} $actual] $res + do_test $tn.$langid [list set {} $actual] $res } } # Run some queries. -do_test_2 1.1 {delta} { rowid_list delta } -do_test_2 1.2 {"zero one two"} { rowid_list "zero one two" } -do_test_2 1.3 {zero one two} { +do_test_query1 2.1.1 {delta} { rowid_list delta } +do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" } +do_test_query1 2.1.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } -do_test_2 1.4 {"zero one" OR "one two"} { +do_test_query1 2.1.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } @@ -264,12 +269,12 @@ do_execsql_test 2.2 { INSERT INTO t2(t2) VALUES('optimize'); SELECT count(*) FROM t2_segdir; } {9} -do_test_2 2.1 {delta} { rowid_list delta } -do_test_2 2.2 {"zero one two"} { rowid_list "zero one two" } -do_test_2 2.3 {zero one two} { +do_test_query1 2.2.1 {delta} { rowid_list delta } +do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" } +do_test_query1 2.2.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } -do_test_2 2.4 {"zero one" OR "one two"} { +do_test_query1 2.2.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } @@ -280,14 +285,64 @@ do_test 2.3 { build_multilingual_db_1 db execsql { INSERT INTO t2(t2) VALUES('rebuild') } } {} -do_test_2 3.1 {delta} { rowid_list delta } -do_test_2 3.2 {"zero one two"} { rowid_list "zero one two" } -do_test_2 3.3 {zero one two} { +do_test_query1 2.3.1 {delta} { rowid_list delta } +do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" } +do_test_query1 2.3.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } -do_test_2 3.4 {"zero one" OR "one two"} { +do_test_query1 2.3.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } +#------------------------------------------------------------------------- +# Test cases 3.* +# +do_test 3.0 { + reset_db + build_multilingual_db_1 db + execsql { + CREATE TABLE t3_data(l, x, y); + INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2; + DROP TABLE t2; + } +} {} +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l); + INSERT INTO t2(t2) VALUES('rebuild'); +} + +do_test_query1 3.1.1 {delta} { rowid_list delta } +do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" } +do_test_query1 3.1.3 {zero one two} { + and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] +} +do_test_query1 3.1.4 {"zero one" OR "one two"} { + or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] +} + +do_execsql_test 3.2.1 { + DROP TABLE t2; + CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable); +} + +do_execsql_test 3.2.2 { + INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data; +} + +do_execsql_test 3.2.3 { + DROP TABLE t3_data; +} + +do_test_query1 3.3.1 {delta} { rowid_list delta } +do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" } +do_test_query1 3.3.3 {zero one two} { + and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] +} +do_test_query1 3.3.4 {"zero one" OR "one two"} { + or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] +} + + + finish_test From 996073b3aec31f41354a25148bbc995c1391633c Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 3 Mar 2012 18:46:41 +0000 Subject: [PATCH 6/6] Add the xLanguageid method to sqlite3_fts3_tokenizer versions 1 and greater. FossilOrigin-Name: f8e9c445dd358c40e5a7bf3756b9f291909dbea7 --- ext/fts3/fts3.c | 10 +- ext/fts3/fts3Int.h | 6 +- ext/fts3/fts3_expr.c | 43 ++++++-- ext/fts3/fts3_snippet.c | 16 +-- ext/fts3/fts3_test.c | 199 ++++++++++++++++++++++++++++++++++++++ ext/fts3/fts3_tokenizer.c | 3 +- ext/fts3/fts3_tokenizer.h | 11 ++- ext/fts3/fts3_write.c | 39 ++++---- manifest | 30 +++--- manifest.uuid | 2 +- test/fts4langid.test | 41 +++++++- test/permutations.test | 3 +- 12 files changed, 340 insertions(+), 63 deletions(-) diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index dec7f8722c..f16191f31f 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -2958,8 +2958,11 @@ static int fts3FilterMethod( return SQLITE_NOMEM; } - rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat, - p->nColumn, iCol, zQuery, -1, &pCsr->pExpr + pCsr->iLangid = 0; + if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); + + rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, + p->azColumn, p->bHasStat, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr ); if( rc!=SQLITE_OK ){ if( rc==SQLITE_ERROR ){ @@ -2969,9 +2972,6 @@ static int fts3FilterMethod( return rc; } - pCsr->iLangid = 0; - if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); - rc = sqlite3Fts3ReadLock(p); if( rc!=SQLITE_OK ) return rc; diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 078b5b987b..393cd6aea1 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -498,7 +498,7 @@ void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); /* fts3_expr.c */ -int sqlite3Fts3ExprParse(sqlite3_tokenizer *, +int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, char **, int, int, int, const char *, int, Fts3Expr ** ); void sqlite3Fts3ExprFree(Fts3Expr *); @@ -507,6 +507,10 @@ int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); int sqlite3Fts3InitTerm(sqlite3 *db); #endif +int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int, + sqlite3_tokenizer_cursor ** +); + /* fts3_aux.c */ int sqlite3Fts3InitAux(sqlite3 *db); diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index 1c3a79071c..a6e3492242 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -92,6 +92,7 @@ int sqlite3_fts3_enable_parentheses = 0; typedef struct ParseContext ParseContext; struct ParseContext { sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ + int iLangid; /* Language id used with tokenizer */ const char **azCol; /* Array of column names for fts3 table */ int bFts4; /* True to allow FTS4-only syntax */ int nCol; /* Number of entries in azCol[] */ @@ -127,6 +128,33 @@ static void *fts3MallocZero(int nByte){ return pRet; } +int sqlite3Fts3OpenTokenizer( + sqlite3_tokenizer *pTokenizer, + int iLangid, + const char *z, + int n, + sqlite3_tokenizer_cursor **ppCsr +){ + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCsr = 0; + int rc; + + rc = pModule->xOpen(pTokenizer, z, n, &pCsr); + assert( rc==SQLITE_OK || pCsr==0 ); + if( rc==SQLITE_OK ){ + pCsr->pTokenizer = pTokenizer; + if( pModule->iVersion>=1 ){ + rc = pModule->xLanguageid(pCsr, iLangid); + if( rc!=SQLITE_OK ){ + pModule->xClose(pCsr); + pCsr = 0; + } + } + } + *ppCsr = pCsr; + return rc; +} + /* ** Extract the next token from buffer z (length n) using the tokenizer @@ -154,15 +182,13 @@ static int getNextToken( Fts3Expr *pRet = 0; int nConsumed = 0; - rc = pModule->xOpen(pTokenizer, z, n, &pCursor); + rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; int nToken, iStart, iEnd, iPosition; int nByte; /* total space to allocate */ - pCursor->pTokenizer = pTokenizer; rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); - if( rc==SQLITE_OK ){ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; pRet = (Fts3Expr *)fts3MallocZero(nByte); @@ -268,10 +294,10 @@ static int getNextString( ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase ** structures. */ - rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); + rc = sqlite3Fts3OpenTokenizer( + pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); if( rc==SQLITE_OK ){ int ii; - pCursor->pTokenizer = pTokenizer; for(ii=0; rc==SQLITE_OK; ii++){ const char *zByte; int nByte, iBegin, iEnd, iPos; @@ -745,6 +771,7 @@ exprparse_out: */ int sqlite3Fts3ExprParse( sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ char **azCol, /* Array of column names for fts3 table */ int bFts4, /* True to allow FTS4-only syntax */ int nCol, /* Number of entries in azCol[] */ @@ -755,11 +782,13 @@ int sqlite3Fts3ExprParse( int nParsed; int rc; ParseContext sParse; + + memset(&sParse, 0, sizeof(ParseContext)); sParse.pTokenizer = pTokenizer; + sParse.iLangid = iLangid; sParse.azCol = (const char **)azCol; sParse.nCol = nCol; sParse.iDefaultCol = iDefaultCol; - sParse.nNest = 0; sParse.bFts4 = bFts4; if( z==0 ){ *ppExpr = 0; @@ -950,7 +979,7 @@ static void fts3ExprTest( } rc = sqlite3Fts3ExprParse( - pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr ); if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ sqlite3_result_error(context, "Error parsing expression", -1); diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index 23ef25c5d4..fd5bc9786b 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -532,6 +532,7 @@ static int fts3StringAppend( */ static int fts3SnippetShift( Fts3Table *pTab, /* FTS3 table snippet comes from */ + int iLangid, /* Language id to use in tokenizing */ int nSnippet, /* Number of tokens desired for snippet */ const char *zDoc, /* Document text to extract snippet from */ int nDoc, /* Size of buffer zDoc in bytes */ @@ -567,11 +568,10 @@ static int fts3SnippetShift( /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) ** or more tokens in zDoc/nDoc. */ - rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); if( rc!=SQLITE_OK ){ return rc; } - pC->pTokenizer = pTab->pTokenizer; while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); @@ -631,11 +631,10 @@ static int fts3SnippetText( /* Open a token cursor on the document. */ pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; - rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); if( rc!=SQLITE_OK ){ return rc; } - pC->pTokenizer = pTab->pTokenizer; while( rc==SQLITE_OK ){ int iBegin; /* Offset in zDoc of start of token */ @@ -657,7 +656,9 @@ static int fts3SnippetText( if( !isShiftDone ){ int n = nDoc - iBegin; - rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); + rc = fts3SnippetShift( + pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask + ); isShiftDone = 1; /* Now that the shift has been done, check if the initial "..." are @@ -1390,9 +1391,10 @@ void sqlite3Fts3Offsets( } /* Initialize a tokenizer iterator to iterate through column iCol. */ - rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, + zDoc, nDoc, &pC + ); if( rc!=SQLITE_OK ) goto offsets_out; - pC->pTokenizer = pTab->pTokenizer; rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); while( rc==SQLITE_OK ){ diff --git a/ext/fts3/fts3_test.c b/ext/fts3/fts3_test.c index 72735f3d12..7cbc9eae9f 100644 --- a/ext/fts3/fts3_test.c +++ b/ext/fts3/fts3_test.c @@ -13,6 +13,9 @@ ** This file is not part of the production FTS code. It is only used for ** testing. It contains a Tcl command that can be used to test if a document ** matches an FTS NEAR expression. +** +** As of March 2012, it also contains a version 1 tokenizer used for testing +** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly. */ #include @@ -314,11 +317,207 @@ static int fts3_configure_incr_load_cmd( return TCL_OK; } +/************************************************************************** +** Beginning of test tokenizer code. +** +** For language 0, this tokenizer is similar to the default 'simple' +** tokenizer. For other languages L, the following: +** +** * Odd numbered languages are case-sensitive. Even numbered +** languages are not. +** +** * Language ids 100 or greater are considered an error. +** +** The implementation assumes that the input contains only ASCII characters +** (i.e. those that may be encoded in UTF-8 using a single byte). +*/ +typedef struct test_tokenizer { + sqlite3_tokenizer base; +} test_tokenizer; + +typedef struct test_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + const char *aInput; /* Input being tokenized */ + int nInput; /* Size of the input in bytes */ + int iInput; /* Current offset in aInput */ + int iToken; /* Index of next token to be returned */ + char *aBuffer; /* Buffer containing current token */ + int nBuffer; /* Number of bytes allocated at pToken */ + int iLangid; /* Configured language id */ +} test_tokenizer_cursor; + +static int testTokenizerCreate( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + test_tokenizer *pNew; + + pNew = sqlite3_malloc(sizeof(test_tokenizer)); + if( !pNew ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(test_tokenizer)); + + *ppTokenizer = (sqlite3_tokenizer *)pNew; + return SQLITE_OK; +} + +static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){ + test_tokenizer *p = (test_tokenizer *)pTokenizer; + sqlite3_free(p); + return SQLITE_OK; +} + +static int testTokenizerOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *pInput, int nBytes, /* String to be tokenized */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + int rc = SQLITE_OK; /* Return code */ + test_tokenizer_cursor *pCsr; /* New cursor object */ + + UNUSED_PARAMETER(pTokenizer); + + pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor)); + if( pCsr==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pCsr, 0, sizeof(test_tokenizer_cursor)); + pCsr->aInput = pInput; + if( nBytes<0 ){ + pCsr->nInput = strlen(pInput); + }else{ + pCsr->nInput = nBytes; + } + } + + *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; + return rc; +} + +static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){ + test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor; + sqlite3_free(pCsr->aBuffer); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +static int testIsTokenChar(char c){ + return (c>='a' && c<='z') || (c>='A' && c<='Z'); +} +static int testTolower(char c){ + char ret = c; + if( ret>='A' && ret<='Z') ret = ret - ('A'-'a'); + return ret; +} + +static int testTokenizerNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor; + int rc = SQLITE_OK; + const char *p; + const char *pEnd; + + p = &pCsr->aInput[pCsr->iInput]; + pEnd = &pCsr->aInput[pCsr->nInput]; + + /* Skip past any white-space */ + assert( p<=pEnd ); + while( ppCsr->nBuffer ){ + sqlite3_free(pCsr->aBuffer); + pCsr->aBuffer = sqlite3_malloc(nToken); + } + if( pCsr->aBuffer==0 ){ + rc = SQLITE_NOMEM; + }else{ + int i; + + if( pCsr->iLangid & 0x00000001 ){ + for(i=0; iaBuffer[i] = pToken[i]; + }else{ + for(i=0; iaBuffer[i] = testTolower(pToken[i]); + } + pCsr->iToken++; + pCsr->iInput = p - pCsr->aInput; + + *ppToken = pCsr->aBuffer; + *pnBytes = nToken; + *piStartOffset = pToken - pCsr->aInput; + *piEndOffset = p - pCsr->aInput; + *piPosition = pCsr->iToken; + } + } + + return rc; +} + +static int testTokenizerLanguage( + sqlite3_tokenizer_cursor *pCursor, + int iLangid +){ + int rc = SQLITE_OK; + test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor; + pCsr->iLangid = iLangid; + if( pCsr->iLangid>=100 ){ + rc = SQLITE_ERROR; + } + return rc; +} + +static int fts3_test_tokenizer_cmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + static const sqlite3_tokenizer_module testTokenizerModule = { + 1, + testTokenizerCreate, + testTokenizerDestroy, + testTokenizerOpen, + testTokenizerClose, + testTokenizerNext, + testTokenizerLanguage + }; + const sqlite3_tokenizer_module *pPtr = &testTokenizerModule; + if( objc!=1 ){ + Tcl_WrongNumArgs(interp, 1, objv, ""); + return TCL_ERROR; + } + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj( + (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *) + )); + return TCL_OK; +} + +/* +** End of tokenizer code. +**************************************************************************/ + int Sqlitetestfts3_Init(Tcl_Interp *interp){ Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0); Tcl_CreateObjCommand(interp, "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0 ); + Tcl_CreateObjCommand( + interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0 + ); return TCL_OK; } #endif /* ifdef SQLITE_TEST */ diff --git a/ext/fts3/fts3_tokenizer.c b/ext/fts3/fts3_tokenizer.c index 6494bb96d8..f6b044ff6a 100644 --- a/ext/fts3/fts3_tokenizer.c +++ b/ext/fts3/fts3_tokenizer.c @@ -288,11 +288,10 @@ static void testFunc( goto finish; } pTokenizer->pModule = p; - if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ + if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ zErr = "error in xOpen()"; goto finish; } - pCsr->pTokenizer = pTokenizer; while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); diff --git a/ext/fts3/fts3_tokenizer.h b/ext/fts3/fts3_tokenizer.h index 615644506c..c91c7ed790 100644 --- a/ext/fts3/fts3_tokenizer.h +++ b/ext/fts3/fts3_tokenizer.h @@ -52,7 +52,7 @@ typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; struct sqlite3_tokenizer_module { /* - ** Structure version. Should always be set to 0. + ** Structure version. Should always be set to 0 or 1. */ int iVersion; @@ -133,6 +133,15 @@ struct sqlite3_tokenizer_module { int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ int *piPosition /* OUT: Number of tokens returned before this one */ ); + + /*********************************************************************** + ** Methods below this point are only available if iVersion>=1. + */ + + /* + ** Configure the language id of a tokenizer cursor. + */ + int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid); }; struct sqlite3_tokenizer { diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index decbe0b73a..8f97c8be98 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -657,6 +657,7 @@ static int fts3PendingTermsAddOne( */ static int fts3PendingTermsAdd( Fts3Table *p, /* Table into which text will be inserted */ + int iLangid, /* Language id to use */ const char *zText, /* Text of document to be inserted */ int iCol, /* Column into which text is being inserted */ u32 *pnWord /* OUT: Number of tokens inserted */ @@ -686,11 +687,10 @@ static int fts3PendingTermsAdd( return SQLITE_OK; } - rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr); + rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr); if( rc!=SQLITE_OK ){ return rc; } - pCsr->pTokenizer = pTokenizer; xNext = pModule->xNext; while( SQLITE_OK==rc @@ -783,11 +783,16 @@ void sqlite3Fts3PendingTermsClear(Fts3Table *p){ ** Argument apVal is the same as the similarly named argument passed to ** fts3InsertData(). Parameter iDocid is the docid of the new row. */ -static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){ +static int fts3InsertTerms( + Fts3Table *p, + int iLangid, + sqlite3_value **apVal, + u32 *aSz +){ int i; /* Iterator variable */ for(i=2; inColumn+2; i++){ const char *zText = (const char *)sqlite3_value_text(apVal[i]); - int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]); + int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]); if( rc!=SQLITE_OK ){ return rc; } @@ -933,13 +938,11 @@ static void fts3DeleteTerms( if( rc==SQLITE_OK ){ if( SQLITE_ROW==sqlite3_step(pSelect) ){ int i; - rc = fts3PendingTermsDocid(p, - langidFromSelect(p, pSelect), - sqlite3_column_int64(pSelect, 0) - ); + int iLangid = langidFromSelect(p, pSelect); + rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0)); for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ const char *zText = (const char *)sqlite3_column_text(pSelect, i); - rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]); + rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]); aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); } if( rc!=SQLITE_OK ){ @@ -3102,13 +3105,12 @@ static int fts3DoRebuild(Fts3Table *p){ while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ int iCol; - rc = fts3PendingTermsDocid(p, - langidFromSelect(p, pStmt), sqlite3_column_int64(pStmt, 0) - ); + int iLangid = langidFromSelect(p, pStmt); + rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); aSz[p->nColumn] = 0; for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); - rc = fts3PendingTermsAdd(p, z, iCol, &aSz[iCol]); + rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); } if( p->bHasDocsize ){ @@ -3227,14 +3229,13 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); sqlite3_tokenizer_cursor *pTC = 0; - rc = pModule->xOpen(pT, zText, -1, &pTC); + rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); while( rc==SQLITE_OK ){ char const *zToken; /* Buffer containing token */ int nToken; /* Number of bytes in token */ int iDum1, iDum2; /* Dummy variables */ int iPos; /* Position of token in zText */ - pTC->pTokenizer = pT; rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ Fts3PhraseToken *pPT = pDef->pToken; @@ -3467,6 +3468,7 @@ int sqlite3Fts3UpdateMethod( /* If this is an INSERT or UPDATE operation, insert the new record. */ if( nArg>1 && rc==SQLITE_OK ){ + int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); if( bInsertDone==0 ){ rc = fts3InsertData(p, apVal, pRowid); if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ @@ -3474,14 +3476,11 @@ int sqlite3Fts3UpdateMethod( } } if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ - rc = fts3PendingTermsDocid(p, - sqlite3_value_int(apVal[2 + p->nColumn + 2]), - *pRowid - ); + rc = fts3PendingTermsDocid(p, iLangid, *pRowid); } if( rc==SQLITE_OK ){ assert( p->iPrevDocid==*pRowid ); - rc = fts3InsertTerms(p, apVal, aSzIns); + rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); } if( p->bHasDocsize ){ fts3InsertDocsize(&rc, p, aSzIns); diff --git a/manifest b/manifest index 15fcd20c9b..ef077935d3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sproblems\swith\scombining\scontent=\sand\slanguageid=\sin\sa\ssingle\sfts4\stable. -D 2012-03-02T19:53:02.350 +C Add\sthe\sxLanguageid\smethod\sto\ssqlite3_fts3_tokenizer\sversions\s1\sand\sgreater. +D 2012-03-03T18:46:41.456 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -63,22 +63,22 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c fd89caa4169520c32cf46ca5a62df6dd48201422 +F ext/fts3/fts3.c fcda9a9ff7ccfb9fe4388d36063e3405a652e15f F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h 521d300f2af4e741f53c4e2dd540275fb64533eb +F ext/fts3/fts3Int.h d1d7f964ddee067bcd16a6af4ba7ecf66220056d F ext/fts3/fts3_aux.c 72de4cb43db7bfc2f68fbda04b7d8095ae9a6239 -F ext/fts3/fts3_expr.c f5df26bddf46a5916b2a5f80c4027996e92b7b15 +F ext/fts3/fts3_expr.c dbc7ba4c3a6061adde0f38ed8e9b349568299551 F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914 F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c 6c8f395cdf9e1e3afa7fadb7e523dbbf381c6dfa F ext/fts3/fts3_porter.c b7e5276f9f0a5fc7018b6fa55ce0f31f269ef881 -F ext/fts3/fts3_snippet.c 1f9ee6a8e0e242649645968dcec4deb253d86c2a +F ext/fts3/fts3_snippet.c c9e126c20760988aa7c43c6ea1379db34738282e F ext/fts3/fts3_term.c d3466cf99432291be08e379d89645462431809d6 -F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894 -F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106 -F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 +F ext/fts3/fts3_test.c a026412a41450a014ccb7abdd5efaa7c9711d49e +F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce +F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68 F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68 -F ext/fts3/fts3_write.c 35b98a42f9bbdd28af1b1f3bb0c09ff07090a764 +F ext/fts3/fts3_write.c f87bb2d27d31cb7a7bf306747079095393c9d073 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -496,7 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2 F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659 F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68 F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f -F test/fts4langid.test be989b5cddcd7596b87232af193f6c4560a34272 +F test/fts4langid.test fabdd5a8db0fa00292e0704809f566e3fb6dba3a F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a @@ -635,7 +635,7 @@ F test/pageropt.test 9191867ed19a2b3db6c42d1b36b6fbc657cd1ab0 F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16 F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 -F test/permutations.test fa6f0e5f13fe0b1d3f7a7613179b7f7b20028184 +F test/permutations.test 2b5a1b64a8e5114757457fbce9010387d1fe7682 F test/pragma.test f6111ded4d56b79436a60a757d62f3c96a9cf3f5 F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -992,7 +992,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P 181bc35731f19c3e4497ba3338c209918d34ea69 -R acfb20f690a18ac8d67e116ae8c76f7d +P 22491e7bc38aee43819b888e04241cb6a6ef73a3 +R b3e8f4cc7fcd5d7bfa4510a8d2434f16 U dan -Z b771fbbc3f5dd49e39970a5f917fc8b7 +Z 8fb02bd9c9b9481fcea2986d10daaba1 diff --git a/manifest.uuid b/manifest.uuid index def5beae28..868a4cc932 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -22491e7bc38aee43819b888e04241cb6a6ef73a3 \ No newline at end of file +f8e9c445dd358c40e5a7bf3756b9f291909dbea7 \ No newline at end of file diff --git a/test/fts4langid.test b/test/fts4langid.test index b9110cb768..08f1a21c17 100644 --- a/test/fts4langid.test +++ b/test/fts4langid.test @@ -40,7 +40,6 @@ set ::testprefix fts4langid # 3.* - Tests with content= tables. Both where there is a real # underlying content table and where there is not. # -# # 4.* - Test that if one is provided, the tokenizer xLanguage method # is called to configure the tokenizer before tokenizing query # or document text. @@ -342,7 +341,45 @@ do_test_query1 3.3.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } +#------------------------------------------------------------------------- +# Test cases 4.* +# +proc build_multilingual_db_2 {db} { + $db eval { + CREATE VIRTUAL TABLE t4 USING fts4( + tokenize=testtokenizer, + languageid=lid + ); + } + for {set i 0} {$i < 50} {incr i} { + execsql { + INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) + } + } +} +do_test 4.1.0 { + reset_db + set ptr [fts3_test_tokenizer] + execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) } + build_multilingual_db_2 db +} {} +do_execsql_test 4.1.1 { + SELECT docid FROM t4 WHERE t4 MATCH 'quick'; +} {0} +do_execsql_test 4.1.2 { + SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1; +} {} +do_execsql_test 4.1.3 { + SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1; +} {1} +for {set i 0} {$i < 50} {incr i} { + do_execsql_test 4.1.4.$i { + SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i; + } [expr 0==($i%2)] +} +do_catchsql_test 4.1.5 { + INSERT INTO t4(content, lid) VALUES('hello world', 101) +} {1 {SQL logic error or missing database}} finish_test - diff --git a/test/permutations.test b/test/permutations.test index f55db1be73..26c1b2a514 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -184,8 +184,7 @@ test_suite "fts3" -prefix "" -description { fts3aux1.test fts3comp1.test fts3auto.test fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test - fts3corrupt2.test - fts3first.test + fts3corrupt2.test fts3first.test fts4langid.test }