Refactor the sqlite3_normalized_sql() implementation. This is a

work-in-progress. There are still issues. FossilOrigin-Name: a4c890b0af9786295e6df05022009d8946550adb873535c610be805c2b7a4083
2025-11-16 23:02:26 +03:00 · 2018-12-10 16:00:57 +00:00
parent fb34559eb1
commit 643d855da9
7 changed files with 148 additions and 294 deletions
--- a/src/prepare.c
+++ b/src/prepare.c
@@ -709,204 +709,6 @@ static int sqlite3LockAndPrepare(
  return rc;
 }

-#ifdef SQLITE_ENABLE_NORMALIZE
-
-/*
-** Attempt to estimate the final output buffer size needed for the fully
-** normalized version of the specified SQL string.  This should take into
-** account any potential expansion that could occur (e.g. via IN clauses
-** being expanded, etc).  This size returned is the total number of bytes
-** including the NUL terminator.
-*/
-static int estimateNormalizedSize(
-  const char *zSql, /* The original SQL string */
-  int nSql          /* Length of original SQL string */
-){
-  int nOut = nSql + 4;
-  const char *z = zSql;
-  while( nOut<nSql*5 ){
-    while( z[0]!=0 && z[0]!='I' && z[0]!='i' ){ z++; }
-    if( z[0]==0 ) break;
-    z++;
-    if( z[0]!='N' && z[0]!='n' ) break;
-    z++;
-    while( sqlite3Isspace(z[0]) ){ z++; }
-    if( z[0]!='(' ) break;
-    z++;
-    nOut += 5; /* ?,?,? */
-  }
-  return nOut;
-}
-
-/*
-** Copy the current token into the output buffer while dealing with quoted
-** identifiers.  By default, all letters will be converted into lowercase.
-** If the bUpper flag is set, uppercase will be used.  The piOut argument
-** will be used to update the target index into the output string.
-*/
-static void copyNormalizedToken(
-  const char *zSql, /* The original SQL string */
-  int iIn,          /* Current index into the original SQL string */
-  int nToken,       /* Number of bytes in the current token */
-  int tokenFlags,   /* Flags returned by the tokenizer */
-  char *zOut,       /* The output string */
-  int *piOut        /* Pointer to target index into the output string */
-){
-  int bQuoted = tokenFlags & SQLITE_TOKEN_QUOTED;
-  int bKeyword = tokenFlags & SQLITE_TOKEN_KEYWORD;
-  int j = *piOut, k = 0;
-  for(; k<nToken; k++){
-    if( bQuoted ){
-      if( k==0 && iIn>0 ){
-        zOut[j++] = '"';
-        continue;
-      }else if( k==nToken-1 ){
-        zOut[j++] = '"';
-        continue;
-      }
-    }
-    if( bKeyword ){
-      zOut[j++] = sqlite3Toupper(zSql[iIn+k]);
-    }else{
-      zOut[j++] = sqlite3Tolower(zSql[iIn+k]);
-    }
-  }
-  *piOut = j;
-}
-
-/*
-** Compute a normalization of the SQL given by zSql[0..nSql-1].  Return
-** the normalization in space obtained from sqlite3DbMalloc().  Or return
-** NULL if anything goes wrong or if zSql is NULL.
-*/
-char *sqlite3Normalize(
-  Vdbe *pVdbe,      /* VM being reprepared */
-  const char *zSql, /* The original SQL string */
-  int nSql          /* Size of the input string in bytes */
-){
-  sqlite3 *db;           /* Database handle. */
-  char *z;               /* The output string */
-  int nZ;                /* Size of the output string in bytes */
-  int i;                 /* Next character to read from zSql[] */
-  int j;                 /* Next character to fill in on z[] */
-  int tokenType = 0;     /* Type of the next token */
-  int prevTokenType = 0; /* Type of the previous token, except spaces */
-  int n;                 /* Size of the next token */
-  int nParen = 0;        /* Nesting level of parenthesis */
-  int iStartIN = 0;      /* Start of RHS of IN operator in z[] */
-  int nParenAtIN = 0;    /* Value of nParent at start of RHS of IN operator */
-
-  db = sqlite3VdbeDb(pVdbe);
-  assert( db!=0 );
-  if( zSql==0 ) return 0;
-  nZ = estimateNormalizedSize(zSql, nSql);
-  z = sqlite3DbMallocRawNN(db, nZ);
-  if( z==0 ) goto normalizeError;
-  for(i=j=0; i<nSql && zSql[i]; i+=n){
-    int flags = 0;
-    if( tokenType!=TK_SPACE ) prevTokenType = tokenType;
-    n = sqlite3GetTokenNormalized((unsigned char*)zSql+i, &tokenType, &flags);
-    switch( tokenType ){
-      case TK_SPACE: {
-        break;
-      }
-      case TK_ILLEGAL: {
-        goto normalizeError;
-      }
-      case TK_STRING:
-      case TK_INTEGER:
-      case TK_FLOAT:
-      case TK_VARIABLE:
-      case TK_BLOB: {
-        z[j++] = '?';
-        break;
-      }
-      case TK_LP:
-      case TK_RP: {
-        if( tokenType==TK_LP ){
-          nParen++;
-          if( prevTokenType==TK_IN ){
-            iStartIN = j;
-            nParenAtIN = nParen;
-          }
-        }else{
-          if( iStartIN>0 && nParen==nParenAtIN ){
-            assert( iStartIN+6<nZ );
-            memcpy(z+iStartIN+1, "?,?,?", 5);
-            j = iStartIN+6;
-            assert( nZ-1-j>=0 );
-            assert( nZ-1-j<nZ );
-            memset(z+j, 0, nZ-1-j);
-            iStartIN = 0;
-          }
-          nParen--;
-        }
-        assert( nParen>=0 );
-        /* Fall through */
-      }
-      case TK_MINUS:
-      case TK_SEMI:
-      case TK_PLUS:
-      case TK_STAR:
-      case TK_SLASH:
-      case TK_REM:
-      case TK_EQ:
-      case TK_LE:
-      case TK_NE:
-      case TK_LSHIFT:
-      case TK_LT:
-      case TK_RSHIFT:
-      case TK_GT:
-      case TK_GE:
-      case TK_BITOR:
-      case TK_CONCAT:
-      case TK_COMMA:
-      case TK_BITAND:
-      case TK_BITNOT:
-      case TK_DOT:
-      case TK_IN:
-      case TK_IS:
-      case TK_NOT:
-      case TK_NULL:
-      case TK_ID: {
-        if( tokenType==TK_NULL ){
-          if( prevTokenType==TK_IS || prevTokenType==TK_NOT ){
-            /* NULL is a keyword in this case, not a literal value */
-          }else{
-            /* Here the NULL is a literal value */
-            z[j++] = '?';
-            break;
-          }
-        }
-        if( j>0 && sqlite3IsIdChar(z[j-1]) && sqlite3IsIdChar(zSql[i]) ){
-          z[j++] = ' ';
-        }
-        if( tokenType==TK_ID ){
-          if( zSql[i]=='"'
-           && sqlite3VdbeUsesDoubleQuotedString(db,pVdbe,zSql+i,n)
-          ){
-            z[j++] = '?';
-            break;
-          }
-          if( nParen==nParenAtIN ) iStartIN = 0;
-        }
-        copyNormalizedToken(zSql, i, n, flags, z, &j);
-        break;
-      }
-    }
-  }
-  assert( j<nZ && "one" );
-  while( j>0 && z[j-1]==' ' ){ j--; }
-  if( j>0 && z[j-1]!=';' ){ z[j++] = ';'; }
-  z[j] = 0;
-  assert( j<nZ && "two" );
-  return z;
-
-normalizeError:
-  sqlite3DbFree(db, z);
-  return 0;
-}
-#endif /* SQLITE_ENABLE_NORMALIZE */

 /*
 ** Rerun the compilation of a statement after a schema change.