1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Updates to snippet() and offsets() functions of FTS3 so that they work

sanely following an OOM fault.

FossilOrigin-Name: b939a37a8ce296785a300e79ab9d3d87ad91343f
This commit is contained in:
drh
2009-11-28 21:33:21 +00:00
parent 9287d93c17
commit 0a62730d3f
4 changed files with 139 additions and 158 deletions

View File

@ -46,144 +46,94 @@ struct Snippet {
** hi-bit-set characters. This is the same solution used in the
** tokenizer.
*/
/* TODO(shess) The snippet-generation code should be using the
** tokenizer-generated tokens rather than doing its own local
** tokenization.
*/
/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
static int safe_isspace(char c){
static int fts3snippetIsspace(char c){
return (c&0x80)==0 ? isspace(c) : 0;
}
static int safe_isalnum(char c){
return (c&0x80)==0 ? isalnum(c) : 0;
}
/*******************************************************************/
/* DataBuffer is used to collect data into a buffer in piecemeal
** fashion. It implements the usual distinction between amount of
** data currently stored (nData) and buffer capacity (nCapacity).
**
** dataBufferInit - create a buffer with given initial capacity.
** dataBufferReset - forget buffer's data, retaining capacity.
** dataBufferSwap - swap contents of two buffers.
** dataBufferExpand - expand capacity without adding data.
** dataBufferAppend - append data.
** dataBufferAppend2 - append two pieces of data at once.
** dataBufferReplace - replace buffer's data.
/*
** A StringBuffer object holds a zero-terminated string that grows
** arbitrarily by appending. Space to hold the string is obtained
** from sqlite3_malloc(). After any memory allocation failure,
** StringBuffer.z is set to NULL and no further allocation is attempted.
*/
typedef struct DataBuffer {
char *pData; /* Pointer to malloc'ed buffer. */
int nCapacity; /* Size of pData buffer. */
int nData; /* End of data loaded into pData. */
} DataBuffer;
static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){
assert( nCapacity>=0 );
pBuffer->nData = 0;
pBuffer->nCapacity = nCapacity;
pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity);
}
static void dataBufferReset(DataBuffer *pBuffer){
pBuffer->nData = 0;
}
static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){
assert( nAddCapacity>0 );
/* TODO(shess) Consider expanding more aggressively. Note that the
** underlying malloc implementation may take care of such things for
** us already.
*/
if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){
pBuffer->nCapacity = pBuffer->nData+nAddCapacity;
pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity);
}
}
static void dataBufferAppend(DataBuffer *pBuffer,
const char *pSource, int nSource){
assert( nSource>0 && pSource!=NULL );
dataBufferExpand(pBuffer, nSource);
memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource);
pBuffer->nData += nSource;
}
static void dataBufferAppend2(DataBuffer *pBuffer,
const char *pSource1, int nSource1,
const char *pSource2, int nSource2){
assert( nSource1>0 && pSource1!=NULL );
assert( nSource2>0 && pSource2!=NULL );
dataBufferExpand(pBuffer, nSource1+nSource2);
memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1);
memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2);
pBuffer->nData += nSource1+nSource2;
}
static void dataBufferReplace(DataBuffer *pBuffer,
const char *pSource, int nSource){
dataBufferReset(pBuffer);
dataBufferAppend(pBuffer, pSource, nSource);
}
/* StringBuffer is a null-terminated version of DataBuffer. */
typedef struct StringBuffer {
DataBuffer b; /* Includes null terminator. */
char *z; /* Text of the string. Space from malloc. */
int nUsed; /* Number bytes of z[] used, not counting \000 terminator */
int nAlloc; /* Bytes allocated for z[] */
} StringBuffer;
static void initStringBuffer(StringBuffer *sb){
dataBufferInit(&sb->b, 100);
dataBufferReplace(&sb->b, "", 1);
}
static int stringBufferLength(StringBuffer *sb){
return sb->b.nData-1;
}
static char *stringBufferData(StringBuffer *sb){
return sb->b.pData;
/*
** Initialize a new StringBuffer.
*/
static void fts3SnippetSbInit(StringBuffer *p){
p->nAlloc = 100;
p->nUsed = 0;
p->z = sqlite3_malloc( p->nAlloc );
}
static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
assert( sb->b.nData>0 );
if( nFrom>0 ){
sb->b.nData--;
dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1);
}
}
static void append(StringBuffer *sb, const char *zFrom){
nappend(sb, zFrom, strlen(zFrom));
}
/*
** Append text to the string buffer.
*/
static void fts3SnippetAppend(StringBuffer *p, const char *zNew, int nNew){
if( p->z==0 ) return;
if( nNew<0 ) nNew = strlen(zNew);
if( p->nUsed + nNew >= p->nAlloc ){
int nAlloc;
char *zNew;
static int endsInWhiteSpace(StringBuffer *p){
return stringBufferLength(p)>0 &&
safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]);
nAlloc = p->nUsed + nNew + p->nAlloc;
zNew = sqlite3_realloc(p->z, nAlloc);
if( zNew==0 ){
sqlite3_free(p->z);
p->z = 0;
return;
}
p->z = zNew;
p->nAlloc = nAlloc;
}
memcpy(&p->z[p->nUsed], zNew, nNew);
p->nUsed += nNew;
p->z[p->nUsed] = 0;
}
/* If the StringBuffer ends in something other than white space, add a
** single space character to the end.
*/
static void appendWhiteSpace(StringBuffer *p){
if( stringBufferLength(p)==0 ) return;
if( !endsInWhiteSpace(p) ) append(p, " ");
static void fts3SnippetAppendWhiteSpace(StringBuffer *p){
if( p->z && p->nUsed && !fts3snippetIsspace(p->z[p->nUsed-1]) ){
fts3SnippetAppend(p, " ", 1);
}
}
/* Remove white space from the end of the StringBuffer */
static void trimWhiteSpace(StringBuffer *p){
while( endsInWhiteSpace(p) ){
p->b.pData[--p->b.nData-1] = '\0';
static void fts3SnippetTrimWhiteSpace(StringBuffer *p){
if( p->z ){
while( p->nUsed && fts3snippetIsspace(p->z[p->nUsed-1]) ){
p->nUsed--;
}
p->z[p->nUsed] = 0;
}
}
/*
** Release all memory associated with the Snippet structure passed as
** an argument.
*/
static void fts3SnippetFree(Snippet *p){
if( p ){
sqlite3_free(p->aMatch);
sqlite3_free(p->zOffset);
sqlite3_free(p->zSnippet);
sqlite3_free(p);
}
}
/*
** Append a single entry to the p->aMatch[] log.
*/
static void snippetAppendMatch(
static int snippetAppendMatch(
Snippet *p, /* Append the entry to this snippet */
int iCol, int iTerm, /* The column and query term */
int iToken, /* Matching token in document */
@ -192,13 +142,16 @@ static void snippetAppendMatch(
int i;
struct snippetMatch *pMatch;
if( p->nMatch+1>=p->nAlloc ){
struct snippetMatch *pNew;
p->nAlloc = p->nAlloc*2 + 10;
p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
if( p->aMatch==0 ){
pNew = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
if( pNew==0 ){
p->aMatch = 0;
p->nMatch = 0;
p->nAlloc = 0;
return;
return SQLITE_NOMEM;
}
p->aMatch = pNew;
}
i = p->nMatch++;
pMatch = &p->aMatch[i];
@ -207,6 +160,7 @@ static void snippetAppendMatch(
pMatch->iToken = iToken;
pMatch->iStart = iStart;
pMatch->nByte = nByte;
return SQLITE_OK;
}
/*
@ -280,7 +234,7 @@ static int fts3ExprBeneathNot(Fts3Expr *p){
** Add entries to pSnippet->aMatch[] for every match that occurs against
** document zDoc[0..nDoc-1] which is stored in column iColumn.
*/
static void snippetOffsetsOfColumn(
static int snippetOffsetsOfColumn(
Fts3Cursor *pCur, /* The fulltest search cursor */
Snippet *pSnippet, /* The Snippet object to be filled in */
int iColumn, /* Index of fulltext table column */
@ -310,11 +264,12 @@ static void snippetOffsetsOfColumn(
pTokenizer = pVtab->pTokenizer;
pTModule = pTokenizer->pModule;
rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
if( rc ) return;
if( rc ) return rc;
pTCursor->pTokenizer = pTokenizer;
prevMatch = 0;
while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){
while( (rc = pTModule->xNext(pTCursor, &zToken, &nToken,
&iBegin, &iEnd, &iPos))==SQLITE_OK ){
Fts3Expr *pIter = pCur->pExpr;
int iIter = -1;
iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
@ -339,15 +294,18 @@ static void snippetOffsetsOfColumn(
if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){
for(j=nPhrase-1; j>=0; j--){
int k = (iRotor-j) & FTS3_ROTOR_MASK;
snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
rc = snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
iRotorBegin[k], iRotorLen[k]);
if( rc ) goto end_offsets_of_column;
}
}
}
prevMatch = match<<1;
iRotor++;
}
end_offsets_of_column:
pTModule->xClose(pTCursor);
return rc==SQLITE_DONE ? SQLITE_OK : rc;
}
/*
@ -489,6 +447,7 @@ static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){
int iFirst, iLast;
int iTerm = 0;
Snippet *pSnippet;
int rc = SQLITE_OK;
if( pCsr->pExpr==0 ){
return SQLITE_OK;
@ -512,19 +471,23 @@ static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){
iFirst = iColumn;
iLast = iColumn;
}
for(i=iFirst; i<=iLast; i++){
for(i=iFirst; rc==SQLITE_OK && i<=iLast; i++){
const char *zDoc;
int nDoc;
zDoc = (const char*)sqlite3_column_text(pCsr->pStmt, i+1);
nDoc = sqlite3_column_bytes(pCsr->pStmt, i+1);
snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc);
if( zDoc==0 && sqlite3_column_type(pCsr->pStmt, i+1)!=SQLITE_NULL ){
rc = SQLITE_NOMEM;
}else{
rc = snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc);
}
}
while( trimSnippetOffsets(pCsr->pExpr, pSnippet, &iTerm) ){
iTerm = 0;
}
return SQLITE_OK;
return rc;
}
/*
@ -538,7 +501,7 @@ static void snippetOffsetText(Snippet *p){
StringBuffer sb;
char zBuf[200];
if( p->zOffset ) return;
initStringBuffer(&sb);
fts3SnippetSbInit(&sb);
for(i=0; i<p->nMatch; i++){
struct snippetMatch *pMatch = &p->aMatch[i];
if( pMatch->iTerm>=0 ){
@ -550,12 +513,12 @@ static void snippetOffsetText(Snippet *p){
zBuf[0] = ' ';
sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
append(&sb, zBuf);
fts3SnippetAppend(&sb, zBuf, -1);
cnt++;
}
}
p->zOffset = stringBufferData(&sb);
p->nOffset = stringBufferLength(&sb);
p->zOffset = sb.z;
p->nOffset = sb.z ? sb.nUsed : 0;
}
/*
@ -593,10 +556,10 @@ static int wordBoundary(
}
}
for(i=1; i<=10; i++){
if( safe_isspace(zDoc[iBreak-i]) ){
if( fts3snippetIsspace(zDoc[iBreak-i]) ){
return iBreak - i + 1;
}
if( safe_isspace(zDoc[iBreak+i]) ){
if( fts3snippetIsspace(zDoc[iBreak+i]) ){
return iBreak + i + 1;
}
}
@ -640,7 +603,7 @@ static void snippetText(
pSnippet->zSnippet = 0;
aMatch = pSnippet->aMatch;
nMatch = pSnippet->nMatch;
initStringBuffer(&sb);
fts3SnippetSbInit(&sb);
for(i=0; i<nMatch; i++){
aMatch[i].snStatus = SNIPPET_IGNORE;
@ -674,10 +637,10 @@ static void snippetText(
iStart = tailOffset;
}
if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
trimWhiteSpace(&sb);
appendWhiteSpace(&sb);
append(&sb, zEllipsis);
appendWhiteSpace(&sb);
fts3SnippetTrimWhiteSpace(&sb);
fts3SnippetAppendWhiteSpace(&sb);
fts3SnippetAppend(&sb, zEllipsis, -1);
fts3SnippetAppendWhiteSpace(&sb);
}
iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
@ -695,11 +658,11 @@ static void snippetText(
}
if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
&& aMatch[iMatch].iCol==iCol ){
nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
fts3SnippetAppend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
iStart = aMatch[iMatch].iStart;
append(&sb, zStartMark);
nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
append(&sb, zEndMark);
fts3SnippetAppend(&sb, zStartMark, -1);
fts3SnippetAppend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
fts3SnippetAppend(&sb, zEndMark, -1);
iStart += aMatch[iMatch].nByte;
for(j=iMatch+1; j<nMatch; j++){
if( aMatch[j].iTerm==aMatch[iMatch].iTerm
@ -709,20 +672,20 @@ static void snippetText(
}
}
}else{
nappend(&sb, &zDoc[iStart], iEnd - iStart);
fts3SnippetAppend(&sb, &zDoc[iStart], iEnd - iStart);
iStart = iEnd;
}
}
tailCol = iCol;
tailOffset = iEnd;
}
trimWhiteSpace(&sb);
fts3SnippetTrimWhiteSpace(&sb);
if( tailEllipsis ){
appendWhiteSpace(&sb);
append(&sb, zEllipsis);
fts3SnippetAppendWhiteSpace(&sb);
fts3SnippetAppend(&sb, zEllipsis, -1);
}
pSnippet->zSnippet = stringBufferData(&sb);
pSnippet->nSnippet = stringBufferLength(&sb);
pSnippet->zSnippet = sb.z;
pSnippet->nSnippet = sb.z ? sb.nUsed : 0;
}
void sqlite3Fts3Offsets(
@ -731,8 +694,16 @@ void sqlite3Fts3Offsets(
){
Snippet *p; /* Snippet structure */
int rc = snippetAllOffsets(pCsr, &p);
if( rc==SQLITE_OK ){
snippetOffsetText(p);
if( p->zOffset ){
sqlite3_result_text(pCtx, p->zOffset, p->nOffset, SQLITE_TRANSIENT);
}else{
sqlite3_result_error_nomem(pCtx);
}
}else{
sqlite3_result_error_nomem(pCtx);
}
fts3SnippetFree(p);
}
@ -745,8 +716,16 @@ void sqlite3Fts3Snippet(
){
Snippet *p; /* Snippet structure */
int rc = snippetAllOffsets(pCsr, &p);
if( rc==SQLITE_OK ){
snippetText(pCsr, p, zStart, zEnd, zEllipsis);
if( p->zSnippet ){
sqlite3_result_text(pCtx, p->zSnippet, p->nSnippet, SQLITE_TRANSIENT);
}else{
sqlite3_result_error_nomem(pCtx);
}
}else{
sqlite3_result_error_nomem(pCtx);
}
fts3SnippetFree(p);
}

View File

@ -646,9 +646,11 @@ static int fts3DeleteTerms(Fts3Table *p, sqlite3_value **apVal){
}
}
}
rc = sqlite3_reset(pSelect);
}else{
sqlite3_reset(pSelect);
}
return sqlite3_reset(pSelect);
return rc;
}
/*

View File

@ -1,8 +1,8 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Change\sFTS3\sto\sdetect\swhen\sthe\sRHS\sof\sthe\sMATCH\sopertor\sencounters\san\sOOM\nduring\sstring\sformat\sconversion\sand\sreport\sback\san\sSQLITE_NOMEM\serror.
D 2009-11-28T17:23:48
C Updates\sto\ssnippet()\sand\soffsets()\sfunctions\sof\sFTS3\sso\sthat\sthey\swork\nsanely\sfollowing\san\sOOM\sfault.
D 2009-11-28T21:33:21
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -67,11 +67,11 @@ F ext/fts3/fts3_hash.c 29fba5a01e51c53e37040e53821e6b2cec18c8fb
F ext/fts3/fts3_hash.h 39524725425078bf9e814e9569c74a8e5a21b9fb
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c 3063da945fb0a935781c135f7575f39166173eca
F ext/fts3/fts3_snippet.c 082f2906deaaa2656f19b88834e89d099352af6e
F ext/fts3/fts3_snippet.c b62144ea85f413b1226e6d8182320606d96e65ac
F ext/fts3/fts3_tokenizer.c 36f78d1a43a29b0feaec1ced6da9e56b9c653d1f
F ext/fts3/fts3_tokenizer.h 7ff73caa3327589bf6550f60d93ebdd1f6a0fb5c
F ext/fts3/fts3_tokenizer1.c 0a5bcc579f35de5d24a9345d7908dc25ae403ee7
F ext/fts3/fts3_write.c f1bffadc003b243bb804732c336c9ea55b85de09
F ext/fts3/fts3_write.c 9b35ff9666b4867b406e63ca2277de6a81b53103
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33
F ext/icu/icu.c 12e763d288d23b5a49de37caa30737b971a2f1e2
@ -778,14 +778,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 80754d383a0e890ea3f315dab941b9f166481ddd
R c701b62bb7bcebeb3cce64991b77dc95
P 31eed4f8f95f0799d634eccbd9e09cb58172d250
R 63b6d1158b26a83c6b254a0d208f841c
U drh
Z 79bd1c9ce18f056385942b4e2103a96e
Z 8559238aab4fefe4430f422ac07d6956
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFLEVynoxKgR168RlERAmNeAKCFnXvJIGrlx8Xq9FMVpghKHSc6IwCfWSxQ
fDg8QqRtiYK559S5hJk0sTo=
=pBpa
iD8DBQFLEZckoxKgR168RlERAilJAJ9fOhH9A66qH4Y5YOtTa1Ji3y2FSACcC046
fNgY4pufsjXCsUyJbj/lIyo=
=K2mf
-----END PGP SIGNATURE-----

View File

@ -1 +1 @@
31eed4f8f95f0799d634eccbd9e09cb58172d250
b939a37a8ce296785a300e79ab9d3d87ad91343f