mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-27 20:41:58 +03:00
Changes to allow FTS4 tables to be created without the underlying %_docsize table (in order to save space).
FossilOrigin-Name: 31989b18f53d97eddfb39660ef04fbf9463583e0
This commit is contained in:
167
ext/fts3/fts3.c
167
ext/fts3/fts3.c
@ -595,6 +595,8 @@ static int fts3CreateTables(Fts3Table *p){
|
||||
"CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
|
||||
p->zDb, p->zName
|
||||
);
|
||||
}
|
||||
if( p->bHasStat ){
|
||||
fts3DbExec(&rc, db,
|
||||
"CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);",
|
||||
p->zDb, p->zName
|
||||
@ -669,6 +671,36 @@ static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** "Special" FTS4 arguments are column specifications of the following form:
|
||||
**
|
||||
** <key> = <value>
|
||||
**
|
||||
** There may not be whitespace surrounding the "=" character. The <value>
|
||||
** term may be quoted, but the <key> may not.
|
||||
*/
|
||||
static int fts3IsSpecialColumn(
|
||||
const char *z,
|
||||
int *pnKey,
|
||||
char **pzValue
|
||||
){
|
||||
char *zValue;
|
||||
const char *zCsr = z;
|
||||
|
||||
while( *zCsr!='=' ){
|
||||
if( *zCsr=='\0' ) return 0;
|
||||
zCsr++;
|
||||
}
|
||||
|
||||
*pnKey = zCsr-z;
|
||||
zValue = sqlite3_mprintf("%s", &zCsr[1]);
|
||||
if( zValue ){
|
||||
sqlite3Fts3Dequote(zValue);
|
||||
}
|
||||
*pzValue = zValue;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function is the implementation of both the xConnect and xCreate
|
||||
** methods of the FTS3 virtual table.
|
||||
@ -690,8 +722,8 @@ static int fts3InitVtab(
|
||||
char **pzErr /* Write any error message here */
|
||||
){
|
||||
Fts3Hash *pHash = (Fts3Hash *)pAux;
|
||||
Fts3Table *p; /* Pointer to allocated vtab */
|
||||
int rc; /* Return code */
|
||||
Fts3Table *p = 0; /* Pointer to allocated vtab */
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
int i; /* Iterator variable */
|
||||
int nByte; /* Size of allocation used for *p */
|
||||
int iCol; /* Column index */
|
||||
@ -700,35 +732,90 @@ static int fts3InitVtab(
|
||||
char *zCsr; /* Space for holding column names */
|
||||
int nDb; /* Bytes required to hold database name */
|
||||
int nName; /* Bytes required to hold table name */
|
||||
|
||||
const char *zTokenizer = 0; /* Name of tokenizer to use */
|
||||
int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
|
||||
int bNoDocsize = 0; /* True to omit %_docsize table */
|
||||
const char **aCol; /* Array of column names */
|
||||
sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
|
||||
|
||||
assert( strlen(argv[0])==4 );
|
||||
assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
|
||||
|| (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
|
||||
);
|
||||
|
||||
nDb = (int)strlen(argv[1]) + 1;
|
||||
nName = (int)strlen(argv[2]) + 1;
|
||||
for(i=3; i<argc; i++){
|
||||
|
||||
aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) );
|
||||
if( !aCol ) return SQLITE_NOMEM;
|
||||
memset(aCol, 0, sizeof(const char *) * (argc-2));
|
||||
|
||||
/* Loop through all of the arguments passed by the user to the FTS3/4
|
||||
** module (i.e. all the column names and special arguments). This loop
|
||||
** does the following:
|
||||
**
|
||||
** + Figures out the number of columns the FTSX table will have, and
|
||||
** the number of bytes of space that must be allocated to store copies
|
||||
** of the column names.
|
||||
**
|
||||
** + If there is a tokenizer specification included in the arguments,
|
||||
** initializes the tokenizer pTokenizer.
|
||||
*/
|
||||
for(i=3; rc==SQLITE_OK && i<argc; i++){
|
||||
char const *z = argv[i];
|
||||
rc = sqlite3Fts3InitTokenizer(pHash, z, &pTokenizer, &zTokenizer, pzErr);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
int nKey;
|
||||
char *zVal;
|
||||
|
||||
/* Check if this is a tokenizer specification */
|
||||
if( !pTokenizer
|
||||
&& strlen(z)>8
|
||||
&& 0==sqlite3_strnicmp(z, "tokenize", 8)
|
||||
&& 0==sqlite3Fts3IsIdChar(z[8])
|
||||
){
|
||||
rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr);
|
||||
}
|
||||
if( z!=zTokenizer ){
|
||||
|
||||
/* Check if it is an FTS4 special argument. */
|
||||
else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){
|
||||
if( !zVal ){
|
||||
rc = SQLITE_NOMEM;
|
||||
goto fts3_init_out;
|
||||
}
|
||||
if( nKey==9 && 0==sqlite3_strnicmp(z, "matchinfo", 9) ){
|
||||
if( strlen(zVal)==4 && 0==sqlite3_strnicmp(zVal, "fts3", 4) ){
|
||||
bNoDocsize = 1;
|
||||
}else{
|
||||
*pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal);
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
}else{
|
||||
*pzErr = sqlite3_mprintf("unrecognized parameter: %s", z);
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
sqlite3_free(zVal);
|
||||
}
|
||||
|
||||
/* Otherwise, the argument is a column name. */
|
||||
else {
|
||||
nString += (int)(strlen(z) + 1);
|
||||
aCol[nCol++] = z;
|
||||
}
|
||||
}
|
||||
nCol = argc - 3 - (zTokenizer!=0);
|
||||
if( zTokenizer==0 ){
|
||||
rc = sqlite3Fts3InitTokenizer(pHash, 0, &pTokenizer, 0, pzErr);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
assert( pTokenizer );
|
||||
}
|
||||
if( rc!=SQLITE_OK ) goto fts3_init_out;
|
||||
|
||||
if( nCol==0 ){
|
||||
assert( nString==0 );
|
||||
aCol[0] = "content";
|
||||
nString = 8;
|
||||
nCol = 1;
|
||||
}
|
||||
|
||||
if( pTokenizer==0 ){
|
||||
rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
|
||||
if( rc!=SQLITE_OK ) goto fts3_init_out;
|
||||
}
|
||||
assert( pTokenizer );
|
||||
|
||||
|
||||
/* Allocate and populate the Fts3Table structure. */
|
||||
nByte = sizeof(Fts3Table) + /* Fts3Table */
|
||||
nCol * sizeof(char *) + /* azColumn */
|
||||
@ -741,7 +828,6 @@ static int fts3InitVtab(
|
||||
goto fts3_init_out;
|
||||
}
|
||||
memset(p, 0, nByte);
|
||||
|
||||
p->db = db;
|
||||
p->nColumn = nCol;
|
||||
p->nPendingData = 0;
|
||||
@ -749,11 +835,12 @@ static int fts3InitVtab(
|
||||
p->pTokenizer = pTokenizer;
|
||||
p->nNodeSize = 1000;
|
||||
p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
|
||||
zCsr = (char *)&p->azColumn[nCol];
|
||||
|
||||
p->bHasDocsize = (isFts4 && bNoDocsize==0);
|
||||
p->bHasStat = isFts4;
|
||||
fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1);
|
||||
|
||||
/* Fill in the zName and zDb fields of the vtab structure. */
|
||||
zCsr = (char *)&p->azColumn[nCol];
|
||||
p->zName = zCsr;
|
||||
memcpy(zCsr, argv[2], nName);
|
||||
zCsr += nName;
|
||||
@ -762,36 +849,23 @@ static int fts3InitVtab(
|
||||
zCsr += nDb;
|
||||
|
||||
/* Fill in the azColumn array */
|
||||
iCol = 0;
|
||||
for(i=3; i<argc; i++){
|
||||
if( argv[i]!=zTokenizer ){
|
||||
char *z;
|
||||
int n;
|
||||
z = (char *)sqlite3Fts3NextToken(argv[i], &n);
|
||||
memcpy(zCsr, z, n);
|
||||
zCsr[n] = '\0';
|
||||
sqlite3Fts3Dequote(zCsr);
|
||||
p->azColumn[iCol++] = zCsr;
|
||||
zCsr += n+1;
|
||||
assert( zCsr <= &((char *)p)[nByte] );
|
||||
}
|
||||
}
|
||||
if( iCol==0 ){
|
||||
assert( nCol==1 );
|
||||
p->azColumn[0] = "content";
|
||||
for(iCol=0; iCol<nCol; iCol++){
|
||||
char *z;
|
||||
int n;
|
||||
z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n);
|
||||
memcpy(zCsr, z, n);
|
||||
zCsr[n] = '\0';
|
||||
sqlite3Fts3Dequote(zCsr);
|
||||
p->azColumn[iCol] = zCsr;
|
||||
zCsr += n+1;
|
||||
assert( zCsr <= &((char *)p)[nByte] );
|
||||
}
|
||||
|
||||
/* If this is an xCreate call, create the underlying tables in the
|
||||
** database. TODO: For xConnect(), it could verify that said tables exist.
|
||||
*/
|
||||
if( isCreate ){
|
||||
p->bHasContent = 1;
|
||||
p->bHasDocsize = argv[0][3]=='4';
|
||||
rc = fts3CreateTables(p);
|
||||
}else{
|
||||
rc = SQLITE_OK;
|
||||
fts3TableExists(&rc, db, argv[1], argv[2], "_content", &p->bHasContent);
|
||||
fts3TableExists(&rc, db, argv[1], argv[2], "_docsize", &p->bHasDocsize);
|
||||
}
|
||||
|
||||
/* Figure out the page-size for the database. This is required in order to
|
||||
@ -804,11 +878,12 @@ static int fts3InitVtab(
|
||||
fts3DeclareVtab(&rc, p);
|
||||
|
||||
fts3_init_out:
|
||||
assert( p || (pTokenizer && rc!=SQLITE_OK) );
|
||||
|
||||
sqlite3_free(aCol);
|
||||
if( rc!=SQLITE_OK ){
|
||||
if( p ){
|
||||
fts3DisconnectMethod((sqlite3_vtab *)p);
|
||||
}else{
|
||||
}else if( pTokenizer ){
|
||||
pTokenizer->pModule->xDestroy(pTokenizer);
|
||||
}
|
||||
}else{
|
||||
@ -3302,6 +3377,8 @@ static int fts3RenameMethod(
|
||||
"ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';",
|
||||
p->zDb, p->zName, zName
|
||||
);
|
||||
}
|
||||
if( p->bHasStat ){
|
||||
fts3DbExec(&rc, db,
|
||||
"ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';",
|
||||
p->zDb, p->zName, zName
|
||||
|
@ -131,7 +131,7 @@ struct Fts3Table {
|
||||
sqlite3_stmt *aStmt[24];
|
||||
|
||||
int nNodeSize; /* Soft limit for node size */
|
||||
u8 bHasContent; /* True if %_content table exists */
|
||||
u8 bHasStat; /* True if %_stat table exists */
|
||||
u8 bHasDocsize; /* True if %_docsize table exists */
|
||||
int nPgsz; /* Page size for host database */
|
||||
char *zSegmentsTbl; /* Name of %_segments table */
|
||||
@ -329,9 +329,10 @@ int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);
|
||||
/* fts3_tokenizer.c */
|
||||
const char *sqlite3Fts3NextToken(const char *, int *);
|
||||
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
|
||||
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash,
|
||||
const char *, sqlite3_tokenizer **, const char **, char **
|
||||
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
|
||||
sqlite3_tokenizer **, char **
|
||||
);
|
||||
int sqlite3Fts3IsIdChar(char);
|
||||
|
||||
/* fts3_snippet.c */
|
||||
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
|
||||
|
@ -97,7 +97,7 @@ static void scalarFunc(
|
||||
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
|
||||
}
|
||||
|
||||
static int fts3IsIdChar(char c){
|
||||
int sqlite3Fts3IsIdChar(char c){
|
||||
static const char isFtsIdChar[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
|
||||
@ -135,9 +135,9 @@ const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
|
||||
break;
|
||||
|
||||
default:
|
||||
if( fts3IsIdChar(*z1) ){
|
||||
if( sqlite3Fts3IsIdChar(*z1) ){
|
||||
z2 = &z1[1];
|
||||
while( fts3IsIdChar(*z2) ) z2++;
|
||||
while( sqlite3Fts3IsIdChar(*z2) ) z2++;
|
||||
}else{
|
||||
z1++;
|
||||
}
|
||||
@ -150,9 +150,8 @@ const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
|
||||
|
||||
int sqlite3Fts3InitTokenizer(
|
||||
Fts3Hash *pHash, /* Tokenizer hash table */
|
||||
const char *zArg, /* Possible tokenizer specification */
|
||||
const char *zArg, /* Tokenizer name */
|
||||
sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */
|
||||
const char **pzTokenizer, /* OUT: Set to zArg if is tokenizer */
|
||||
char **pzErr /* OUT: Set to malloced error message */
|
||||
){
|
||||
int rc;
|
||||
@ -162,26 +161,15 @@ int sqlite3Fts3InitTokenizer(
|
||||
char *zEnd; /* Pointer to nul-term of zCopy */
|
||||
sqlite3_tokenizer_module *m;
|
||||
|
||||
if( !z ){
|
||||
zCopy = sqlite3_mprintf("simple");
|
||||
}else{
|
||||
if( sqlite3_strnicmp(z, "tokenize", 8) || fts3IsIdChar(z[8])){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
zCopy = sqlite3_mprintf("%s", &z[8]);
|
||||
*pzTokenizer = zArg;
|
||||
}
|
||||
if( !zCopy ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
|
||||
zCopy = sqlite3_mprintf("%s", zArg);
|
||||
if( !zCopy ) return SQLITE_NOMEM;
|
||||
zEnd = &zCopy[strlen(zCopy)];
|
||||
|
||||
z = (char *)sqlite3Fts3NextToken(zCopy, &n);
|
||||
z[n] = '\0';
|
||||
sqlite3Fts3Dequote(z);
|
||||
|
||||
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, z, (int)strlen(z)+1);
|
||||
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
|
||||
if( !m ){
|
||||
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
|
||||
rc = SQLITE_ERROR;
|
||||
|
@ -683,6 +683,8 @@ static int fts3DeleteAll(Fts3Table *p){
|
||||
fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
|
||||
if( p->bHasDocsize ){
|
||||
fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
|
||||
}
|
||||
if( p->bHasStat ){
|
||||
fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
|
||||
}
|
||||
return rc;
|
||||
@ -1035,7 +1037,7 @@ int sqlite3Fts3SegReaderCost(
|
||||
** for the segment is stored on the root page of the b-tree, then the cost
|
||||
** is zero. In this case all required data is already in main memory.
|
||||
*/
|
||||
if( p->bHasDocsize
|
||||
if( p->bHasStat
|
||||
&& !fts3SegReaderIsPending(pReader)
|
||||
&& !fts3SegReaderIsRootOnly(pReader)
|
||||
){
|
||||
@ -2855,8 +2857,8 @@ int sqlite3Fts3UpdateMethod(
|
||||
fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, apVal);
|
||||
if( p->bHasDocsize ){
|
||||
fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, apVal);
|
||||
nChng--;
|
||||
}
|
||||
nChng--;
|
||||
}
|
||||
}
|
||||
}else if( sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL ){
|
||||
@ -2874,12 +2876,12 @@ int sqlite3Fts3UpdateMethod(
|
||||
rc = fts3InsertTerms(p, apVal, aSzIns);
|
||||
}
|
||||
if( p->bHasDocsize ){
|
||||
nChng++;
|
||||
fts3InsertDocsize(&rc, p, aSzIns);
|
||||
}
|
||||
nChng++;
|
||||
}
|
||||
|
||||
if( p->bHasDocsize ){
|
||||
if( p->bHasStat ){
|
||||
fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user