1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-27 20:41:58 +03:00

Changes to allow FTS4 tables to be created without the underlying %_docsize table (in order to save space).

FossilOrigin-Name: 31989b18f53d97eddfb39660ef04fbf9463583e0
This commit is contained in:
dan
2010-11-02 17:41:52 +00:00
parent 07bf3918f9
commit af4c214e1f
10 changed files with 256 additions and 96 deletions

View File

@ -595,6 +595,8 @@ static int fts3CreateTables(Fts3Table *p){
"CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
p->zDb, p->zName
);
}
if( p->bHasStat ){
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);",
p->zDb, p->zName
@ -669,6 +671,36 @@ static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
}
}
/*
** "Special" FTS4 arguments are column specifications of the following form:
**
** <key> = <value>
**
** There may not be whitespace surrounding the "=" character. The <value>
** term may be quoted, but the <key> may not.
*/
static int fts3IsSpecialColumn(
const char *z,
int *pnKey,
char **pzValue
){
char *zValue;
const char *zCsr = z;
while( *zCsr!='=' ){
if( *zCsr=='\0' ) return 0;
zCsr++;
}
*pnKey = zCsr-z;
zValue = sqlite3_mprintf("%s", &zCsr[1]);
if( zValue ){
sqlite3Fts3Dequote(zValue);
}
*pzValue = zValue;
return 1;
}
/*
** This function is the implementation of both the xConnect and xCreate
** methods of the FTS3 virtual table.
@ -690,8 +722,8 @@ static int fts3InitVtab(
char **pzErr /* Write any error message here */
){
Fts3Hash *pHash = (Fts3Hash *)pAux;
Fts3Table *p; /* Pointer to allocated vtab */
int rc; /* Return code */
Fts3Table *p = 0; /* Pointer to allocated vtab */
int rc = SQLITE_OK; /* Return code */
int i; /* Iterator variable */
int nByte; /* Size of allocation used for *p */
int iCol; /* Column index */
@ -700,35 +732,90 @@ static int fts3InitVtab(
char *zCsr; /* Space for holding column names */
int nDb; /* Bytes required to hold database name */
int nName; /* Bytes required to hold table name */
const char *zTokenizer = 0; /* Name of tokenizer to use */
int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
int bNoDocsize = 0; /* True to omit %_docsize table */
const char **aCol; /* Array of column names */
sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
assert( strlen(argv[0])==4 );
assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
|| (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
);
nDb = (int)strlen(argv[1]) + 1;
nName = (int)strlen(argv[2]) + 1;
for(i=3; i<argc; i++){
aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) );
if( !aCol ) return SQLITE_NOMEM;
memset(aCol, 0, sizeof(const char *) * (argc-2));
/* Loop through all of the arguments passed by the user to the FTS3/4
** module (i.e. all the column names and special arguments). This loop
** does the following:
**
** + Figures out the number of columns the FTSX table will have, and
** the number of bytes of space that must be allocated to store copies
** of the column names.
**
** + If there is a tokenizer specification included in the arguments,
** initializes the tokenizer pTokenizer.
*/
for(i=3; rc==SQLITE_OK && i<argc; i++){
char const *z = argv[i];
rc = sqlite3Fts3InitTokenizer(pHash, z, &pTokenizer, &zTokenizer, pzErr);
if( rc!=SQLITE_OK ){
return rc;
int nKey;
char *zVal;
/* Check if this is a tokenizer specification */
if( !pTokenizer
&& strlen(z)>8
&& 0==sqlite3_strnicmp(z, "tokenize", 8)
&& 0==sqlite3Fts3IsIdChar(z[8])
){
rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr);
}
if( z!=zTokenizer ){
/* Check if it is an FTS4 special argument. */
else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){
if( !zVal ){
rc = SQLITE_NOMEM;
goto fts3_init_out;
}
if( nKey==9 && 0==sqlite3_strnicmp(z, "matchinfo", 9) ){
if( strlen(zVal)==4 && 0==sqlite3_strnicmp(zVal, "fts3", 4) ){
bNoDocsize = 1;
}else{
*pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal);
rc = SQLITE_ERROR;
}
}else{
*pzErr = sqlite3_mprintf("unrecognized parameter: %s", z);
rc = SQLITE_ERROR;
}
sqlite3_free(zVal);
}
/* Otherwise, the argument is a column name. */
else {
nString += (int)(strlen(z) + 1);
aCol[nCol++] = z;
}
}
nCol = argc - 3 - (zTokenizer!=0);
if( zTokenizer==0 ){
rc = sqlite3Fts3InitTokenizer(pHash, 0, &pTokenizer, 0, pzErr);
if( rc!=SQLITE_OK ){
return rc;
}
assert( pTokenizer );
}
if( rc!=SQLITE_OK ) goto fts3_init_out;
if( nCol==0 ){
assert( nString==0 );
aCol[0] = "content";
nString = 8;
nCol = 1;
}
if( pTokenizer==0 ){
rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
if( rc!=SQLITE_OK ) goto fts3_init_out;
}
assert( pTokenizer );
/* Allocate and populate the Fts3Table structure. */
nByte = sizeof(Fts3Table) + /* Fts3Table */
nCol * sizeof(char *) + /* azColumn */
@ -741,7 +828,6 @@ static int fts3InitVtab(
goto fts3_init_out;
}
memset(p, 0, nByte);
p->db = db;
p->nColumn = nCol;
p->nPendingData = 0;
@ -749,11 +835,12 @@ static int fts3InitVtab(
p->pTokenizer = pTokenizer;
p->nNodeSize = 1000;
p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
zCsr = (char *)&p->azColumn[nCol];
p->bHasDocsize = (isFts4 && bNoDocsize==0);
p->bHasStat = isFts4;
fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1);
/* Fill in the zName and zDb fields of the vtab structure. */
zCsr = (char *)&p->azColumn[nCol];
p->zName = zCsr;
memcpy(zCsr, argv[2], nName);
zCsr += nName;
@ -762,36 +849,23 @@ static int fts3InitVtab(
zCsr += nDb;
/* Fill in the azColumn array */
iCol = 0;
for(i=3; i<argc; i++){
if( argv[i]!=zTokenizer ){
char *z;
int n;
z = (char *)sqlite3Fts3NextToken(argv[i], &n);
memcpy(zCsr, z, n);
zCsr[n] = '\0';
sqlite3Fts3Dequote(zCsr);
p->azColumn[iCol++] = zCsr;
zCsr += n+1;
assert( zCsr <= &((char *)p)[nByte] );
}
}
if( iCol==0 ){
assert( nCol==1 );
p->azColumn[0] = "content";
for(iCol=0; iCol<nCol; iCol++){
char *z;
int n;
z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n);
memcpy(zCsr, z, n);
zCsr[n] = '\0';
sqlite3Fts3Dequote(zCsr);
p->azColumn[iCol] = zCsr;
zCsr += n+1;
assert( zCsr <= &((char *)p)[nByte] );
}
/* If this is an xCreate call, create the underlying tables in the
** database. TODO: For xConnect(), it could verify that said tables exist.
*/
if( isCreate ){
p->bHasContent = 1;
p->bHasDocsize = argv[0][3]=='4';
rc = fts3CreateTables(p);
}else{
rc = SQLITE_OK;
fts3TableExists(&rc, db, argv[1], argv[2], "_content", &p->bHasContent);
fts3TableExists(&rc, db, argv[1], argv[2], "_docsize", &p->bHasDocsize);
}
/* Figure out the page-size for the database. This is required in order to
@ -804,11 +878,12 @@ static int fts3InitVtab(
fts3DeclareVtab(&rc, p);
fts3_init_out:
assert( p || (pTokenizer && rc!=SQLITE_OK) );
sqlite3_free(aCol);
if( rc!=SQLITE_OK ){
if( p ){
fts3DisconnectMethod((sqlite3_vtab *)p);
}else{
}else if( pTokenizer ){
pTokenizer->pModule->xDestroy(pTokenizer);
}
}else{
@ -3302,6 +3377,8 @@ static int fts3RenameMethod(
"ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';",
p->zDb, p->zName, zName
);
}
if( p->bHasStat ){
fts3DbExec(&rc, db,
"ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';",
p->zDb, p->zName, zName

View File

@ -131,7 +131,7 @@ struct Fts3Table {
sqlite3_stmt *aStmt[24];
int nNodeSize; /* Soft limit for node size */
u8 bHasContent; /* True if %_content table exists */
u8 bHasStat; /* True if %_stat table exists */
u8 bHasDocsize; /* True if %_docsize table exists */
int nPgsz; /* Page size for host database */
char *zSegmentsTbl; /* Name of %_segments table */
@ -329,9 +329,10 @@ int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);
/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash,
const char *, sqlite3_tokenizer **, const char **, char **
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
sqlite3_tokenizer **, char **
);
int sqlite3Fts3IsIdChar(char);
/* fts3_snippet.c */
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);

View File

@ -97,7 +97,7 @@ static void scalarFunc(
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}
static int fts3IsIdChar(char c){
int sqlite3Fts3IsIdChar(char c){
static const char isFtsIdChar[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
@ -135,9 +135,9 @@ const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
break;
default:
if( fts3IsIdChar(*z1) ){
if( sqlite3Fts3IsIdChar(*z1) ){
z2 = &z1[1];
while( fts3IsIdChar(*z2) ) z2++;
while( sqlite3Fts3IsIdChar(*z2) ) z2++;
}else{
z1++;
}
@ -150,9 +150,8 @@ const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
int sqlite3Fts3InitTokenizer(
Fts3Hash *pHash, /* Tokenizer hash table */
const char *zArg, /* Possible tokenizer specification */
const char *zArg, /* Tokenizer name */
sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */
const char **pzTokenizer, /* OUT: Set to zArg if is tokenizer */
char **pzErr /* OUT: Set to malloced error message */
){
int rc;
@ -162,26 +161,15 @@ int sqlite3Fts3InitTokenizer(
char *zEnd; /* Pointer to nul-term of zCopy */
sqlite3_tokenizer_module *m;
if( !z ){
zCopy = sqlite3_mprintf("simple");
}else{
if( sqlite3_strnicmp(z, "tokenize", 8) || fts3IsIdChar(z[8])){
return SQLITE_OK;
}
zCopy = sqlite3_mprintf("%s", &z[8]);
*pzTokenizer = zArg;
}
if( !zCopy ){
return SQLITE_NOMEM;
}
zCopy = sqlite3_mprintf("%s", zArg);
if( !zCopy ) return SQLITE_NOMEM;
zEnd = &zCopy[strlen(zCopy)];
z = (char *)sqlite3Fts3NextToken(zCopy, &n);
z[n] = '\0';
sqlite3Fts3Dequote(z);
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, z, (int)strlen(z)+1);
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
if( !m ){
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
rc = SQLITE_ERROR;

View File

@ -683,6 +683,8 @@ static int fts3DeleteAll(Fts3Table *p){
fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
if( p->bHasDocsize ){
fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
}
if( p->bHasStat ){
fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
}
return rc;
@ -1035,7 +1037,7 @@ int sqlite3Fts3SegReaderCost(
** for the segment is stored on the root page of the b-tree, then the cost
** is zero. In this case all required data is already in main memory.
*/
if( p->bHasDocsize
if( p->bHasStat
&& !fts3SegReaderIsPending(pReader)
&& !fts3SegReaderIsRootOnly(pReader)
){
@ -2855,8 +2857,8 @@ int sqlite3Fts3UpdateMethod(
fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, apVal);
if( p->bHasDocsize ){
fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, apVal);
nChng--;
}
nChng--;
}
}
}else if( sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL ){
@ -2874,12 +2876,12 @@ int sqlite3Fts3UpdateMethod(
rc = fts3InsertTerms(p, apVal, aSzIns);
}
if( p->bHasDocsize ){
nChng++;
fts3InsertDocsize(&rc, p, aSzIns);
}
nChng++;
}
if( p->bHasDocsize ){
if( p->bHasStat ){
fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
}