diff --git a/manifest b/manifest index 3614ce2e78..35b0645214 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\scomments\sand\sremove\sdead\scode\sfrom\sbtree.c\s(CVS\s1432) -D 2004-05-22T02:55:23 +C Steps\stowards\sUTF-16\sdatabases.\sSome\stests\sare\sfailing\sbecause\sof\sthis\ncommit.\s(CVS\s1433) +D 2004-05-22T03:05:34 F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -37,7 +37,7 @@ F src/func.c cfbb7096efb58e2857e3b312a8958a12774b625a F src/hash.c 440c2f8cb373ee1b4e13a0988489c7cd95d55b6f F src/hash.h 762d95f1e567664d1eafc1687de755626be962fb F src/insert.c e510d62d23b4de4d901e7ccbbe7833b7fb3b9570 -F src/main.c 5604d5a9a6b31720b95e6a2cb4c804c53592f145 +F src/main.c a2be4b3976818f3fe5dfdc5709c330599da7acc3 F src/md5.c 8e39fdae6d8776b87558e91dcc94740c9b635a9c F src/os.c ddcda92f7fd71b4513c57c1ec797917f206d504e F src/os.h 6e446a17cbeb6c2ce470683a0bb8d9c63abe8607 @@ -50,24 +50,24 @@ F src/random.c eff68e3f257e05e81eae6c4d50a51eb88beb4ff3 F src/select.c 7d77a8bed7eeac23216d42fc1be006fb4352fcdc F src/shell.c 0c4662e13bfbfd3d13b066c5859cc97ad2f95d21 F src/sqlite.h.in 75b6eb9eeff3e84052444584b5ad4f0d9a81b8ac -F src/sqliteInt.h a7b3f10c5e7231abee9ef12ee2d986554ad073df +F src/sqliteInt.h 4b45892cb082f4883efb58c5e13328c42cbc7642 F src/table.c af14284fa36c8d41f6829e3f2819dce07d3e2de2 F src/tclsqlite.c fbf0fac73624ae246551a6c671f1de0235b5faa1 F src/test1.c e5ba63a9a36fe34f48e3363887984c4d71dbf066 F src/test2.c 6195a1ca2c8d0d2d93644e86da3289b403486872 F src/test3.c 5e4a6d596f982f6f47a5f9f75ede9b4a3b739968 F src/test4.c b3fab9aea7a8940a8a7386ce1c7e2157b09bd296 -F src/test5.c c92dca7028b19b9c8319d55e0a5037fc183640a6 +F src/test5.c 9a1f15133f6955f067c5246e564723b5f23ff221 F src/tokenize.c e7536dd31205d5afb76c1bdc832dea009c7a3847 F src/trigger.c 11afe9abfba13a2ba142944c797c952e162d117f F src/update.c 1a5e9182596f3ea8c7a141e308a3d2a7e5689fee -F src/utf.c c27c4f1120f7aaef00cd6942b3d9e3f4ca4fe0e4 +F src/utf.c 537e1c98cddc623628d44497ec02c2246cf66dea F src/util.c 5cbeb452da09cfc7248de9948c15b14d840723f7 F src/vacuum.c c134702e023db8778e6be59ac0ea7b02315b5476 -F src/vdbe.c 2944326a99869c71698f634d6ace9e9be56d9180 +F src/vdbe.c 91e6663c690f5208fadca0bd06b4878aed61f239 F src/vdbe.h 391d5642a83af686f35c228fcd36cb4456d68f44 -F src/vdbeInt.h 8ed2272e97bef20c5302c3b2cb4f900e8b5e2642 -F src/vdbeaux.c 2dd437063e9a0769ce453f7ce94407934f56e2f8 +F src/vdbeInt.h f40e8048d644c8389cda16f46479376f763d56e6 +F src/vdbeaux.c 8e993bfd0f943163548ce3e09797ce5503d2366f F src/where.c efe5d25fe18cd7381722457898cd863e84097a0c F test/all.test 569a92a8ee88f5300c057cc4a8f50fbbc69a3242 F test/attach.test cb9b884344e6cfa5e165965d5b1adea679a24c83 @@ -195,7 +195,7 @@ F www/sqlite.tcl 3c83b08cf9f18aa2d69453ff441a36c40e431604 F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P acb65297b69c531813287166175fa7864c900fe6 -R b096a079434ae3eb2f44e598006a9ba9 -U drh -Z 0fc6dbec68937b17866f5fe0a96973f8 +P 8069caca82bc4d40d8ac95bafdd91a18a70ab1e0 +R f44ee6ced05cfe974110947781c41eee +U danielk1977 +Z 35da3b16c198243de8a20489bd428c37 diff --git a/manifest.uuid b/manifest.uuid index f7b7852804..c813f79140 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8069caca82bc4d40d8ac95bafdd91a18a70ab1e0 \ No newline at end of file +c4a8246864eee7cb993ab7b703324d92c284d72a \ No newline at end of file diff --git a/src/main.c b/src/main.c index 8b50ac16f4..bbbb2135fe 100644 --- a/src/main.c +++ b/src/main.c @@ -14,7 +14,7 @@ ** other files are for internal use by SQLite and should not be ** accessed by users of the library. ** -** $Id: main.c,v 1.181 2004/05/21 11:39:05 danielk1977 Exp $ +** $Id: main.c,v 1.182 2004/05/22 03:05:34 danielk1977 Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -1151,7 +1151,7 @@ int sqlite3_prepare16( char const *zTail8 = 0; int rc; - zSql8 = sqlite3utf16to8(zSql, nBytes); + zSql8 = sqlite3utf16to8(zSql, nBytes, SQLITE3_BIGENDIAN); if( !zSql8 ){ sqlite3Error(db, SQLITE_NOMEM, 0); return SQLITE_NOMEM; @@ -1197,6 +1197,7 @@ static int openDatabase( db->magic = SQLITE_MAGIC_BUSY; db->nDb = 2; db->aDb = db->aDbStatic; + db->enc = def_enc; /* db->flags |= SQLITE_ShortColNames; */ sqlite3HashInit(&db->aFunc, SQLITE_HASH_STRING, 1); sqlite3HashInit(&db->aCollSeq, SQLITE_HASH_STRING, 0); @@ -1252,6 +1253,7 @@ int sqlite3_open_new( const char **options ){ return openDatabase(zFilename, ppDb, options, TEXT_Utf8); + /* return openDatabase(zFilename, ppDb, options, TEXT_Utf16le); */ } sqlite *sqlite3_open(const char *zFilename, int mode, char **pzErrMsg){ @@ -1280,7 +1282,7 @@ int sqlite3_open16( assert( ppDb ); - zFilename8 = sqlite3utf16to8(zFilename, -1); + zFilename8 = sqlite3utf16to8(zFilename, -1, SQLITE3_BIGENDIAN); if( !zFilename8 ){ *ppDb = 0; return SQLITE_NOMEM; @@ -1337,7 +1339,7 @@ int sqlite3_open16(const void *filename, sqlite3 **pDb, const char **options){ int rc; char * filename8; - filename8 = sqlite3utf16to8(filename, -1); + filename8 = sqlite3utf16to8(filename, -1, SQLITE3_BIGENDIAN); if( !filename8 ){ return SQLITE_NOMEM; } diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 5d8cee4c3b..5e3153bd2a 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.244 2004/05/21 10:08:54 danielk1977 Exp $ +** @(#) $Id: sqliteInt.h,v 1.245 2004/05/22 03:05:34 danielk1977 Exp $ */ #include "config.h" #include "sqlite.h" @@ -282,7 +282,6 @@ struct Db { u16 flags; /* Flags associated with this database */ void *pAux; /* Auxiliary data. Usually NULL */ void (*xFreeAux)(void*); /* Routine to free pAux */ - u8 textEnc; /* Text encoding for this database. */ }; /* @@ -415,6 +414,7 @@ struct sqlite { int errCode; /* Most recent error code (SQLITE_*) */ char *zErrMsg; /* Most recent error message (UTF-8 encoded) */ void *zErrMsg16; /* Most recent error message (UTF-16 encoded) */ + u8 enc; /* Text encoding for this database. */ }; /* @@ -652,6 +652,7 @@ struct FKey { ** otherwise be equal, then return a result as if the second key larger. */ struct KeyInfo { + u8 enc; /* Text encoding - one of the TEXT_Utf* values */ u8 incrKey; /* Increase 2nd key by epsilon before comparison */ int nField; /* Number of entries in aColl[] */ u8 *aSortOrder; /* If defined an aSortOrder[i] is true, sort DESC */ @@ -1341,7 +1342,7 @@ char *sqlite3_snprintf(int,char*,const char*,...); int sqlite3GetInt32(const char *, int*); int sqlite3GetInt64(const char *, i64*); int sqlite3FitsIn64Bits(const char *); -unsigned char *sqlite3utf16to8(const void *pData, int N); +unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian); void *sqlite3utf8to16be(const unsigned char *pIn, int N); void *sqlite3utf8to16le(const unsigned char *pIn, int N); void sqlite3utf16to16le(void *pData, int N); @@ -1361,4 +1362,4 @@ int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity); char sqlite3ExprAffinity(Expr *pExpr); int sqlite3atoi64(const char*, i64*); void sqlite3Error(sqlite *, int, const char*,...); - +int sqlite3utfTranslate(const void *, int , u8 , void **, int *, u8); diff --git a/src/test5.c b/src/test5.c index 816db4cc0b..f42840169c 100644 --- a/src/test5.c +++ b/src/test5.c @@ -15,9 +15,10 @@ ** is used for testing the SQLite routines for converting between ** the various supported unicode encodings. ** -** $Id: test5.c,v 1.4 2004/05/19 10:34:53 danielk1977 Exp $ +** $Id: test5.c,v 1.5 2004/05/22 03:05:34 danielk1977 Exp $ */ #include "sqliteInt.h" +#include "os.h" /* to get SQLITE3_BIGENDIAN */ #include "tcl.h" #include #include @@ -166,7 +167,7 @@ static int sqlite_utf16to8( } in = Tcl_GetByteArrayFromObj(objv[1], 0); - out = sqlite3utf16to8(in, -1); + out = sqlite3utf16to8(in, -1, SQLITE3_BIGENDIAN); res = Tcl_NewByteArrayObj(out, strlen(out)+1); sqliteFree(out); diff --git a/src/utf.c b/src/utf.c index b2891ac5e2..5e77967a61 100644 --- a/src/utf.c +++ b/src/utf.c @@ -12,7 +12,7 @@ ** This file contains routines used to translate between UTF-8, ** UTF-16, UTF-16BE, and UTF-16LE. ** -** $Id: utf.c,v 1.6 2004/05/20 11:00:52 danielk1977 Exp $ +** $Id: utf.c,v 1.7 2004/05/22 03:05:34 danielk1977 Exp $ ** ** Notes on UTF-8: ** @@ -53,6 +53,7 @@ #include #include #include "sqliteInt.h" +#include "os.h" typedef struct UtfString UtfString; struct UtfString { @@ -92,13 +93,13 @@ struct UtfString { /* ** Read the BOM from the start of *pStr, if one is present. Return zero ** for little-endian, non-zero for big-endian. If no BOM is present, return -** the machines native byte order. +** the value of the parameter "big_endian". ** ** Return values: ** 1 -> big-endian string ** 0 -> little-endian string */ -static int readUtf16Bom(UtfString *pStr){ +static int readUtf16Bom(UtfString *pStr, int big_endian){ /* The BOM must be the first thing read from the string */ assert( pStr->c==0 ); @@ -121,7 +122,7 @@ static int readUtf16Bom(UtfString *pStr){ } } - return SQLITE3_NATIVE_BIGENDIAN; + return big_endian; } @@ -375,8 +376,10 @@ int sqlite3utf16ByteLen(const void *pZ, int nChar){ str.c = 0; str.n = -1; - /* Check for a BOM */ - big_endian = readUtf16Bom(&str); + /* Check for a BOM. We just ignore it if there is one, it's only read + ** so that it is not counted as a character. + */ + big_endian = readUtf16Bom(&str, 0); ret = 0-str.c; while( code!=0 && nReadflags & MEM_Str)==0){hardStringify(P);} \ else if(((P)->flags & MEM_Term)==0){hardNulTermify(P);} static int hardNulTermify(Mem *pStack){ @@ -179,88 +205,155 @@ static void hardRealify(Mem *pStack){ pStack->flags |= MEM_Real; } +/* +** Parmameter "flags" is the value of the flags for a string Mem object. +** Return one of TEXT_Utf8, TEXT_Utf16le or TEXT_Utf16be, depending +** on the encoding indicated by the flags value. +*/ +static u8 flagsToEnc(int flags){ + if( flags&MEM_Utf8 ){ + assert( !(flags&(MEM_Utf16be|MEM_Utf16le)) ); + return TEXT_Utf8; + } + if( flags&MEM_Utf16le ){ + assert( !(flags&(MEM_Utf8|MEM_Utf16be)) ); + return TEXT_Utf16le; + } + assert( flags&MEM_Utf16be ); + assert( !(flags&(MEM_Utf8|MEM_Utf16le)) ); + return TEXT_Utf16be; +} + +/* +** Parameter "enc" is one of TEXT_Utf8, TEXT_Utf16le or TEXT_Utf16be. +** Return the corresponding MEM_Utf* value. +*/ +static int encToFlags(u8 enc){ + switch( enc ){ + case TEXT_Utf8: return MEM_Utf8; + case TEXT_Utf16be: return MEM_Utf16be; + case TEXT_Utf16le: return MEM_Utf16le; + } + assert(0); +} + /* ** If pMem is a string object, this routine sets the encoding of the string ** (to one of UTF-8 or UTF16) and whether or not the string is ** nul-terminated. If pMem is not a string object, then this routine is ** a no-op. ** -** If argument "utf16" is true, then this routine will attempt to convert -** the string to native byte order UTF-16 encoding. Otherwise, the -** conversion is to UTF-8 encoding. If the "term" argument is true, then a -** nul terminator is added to the string if it does not already have one. -** -** +** The second argument, "flags" consists of one of MEM_Utf8, MEM_Utf16le +** or MEM_Utf16be, possible ORed with MEM_Term. If necessary this function +** manipulates the value stored by pMem so that it matches the flags passed +** in "flags". ** ** SQLITE_OK is returned if the conversion is successful (or not required). ** SQLITE_NOMEM may be returned if a malloc() fails during conversion ** between formats. */ -static int SetEncoding(Mem *pMem, int flags){ - int f; - if( !(pMem->flags&MEM_Str) ){ +int SetEncoding(Mem *pMem, int flags){ + u8 enc1; /* Current string encoding (TEXT_Utf* value) */ + u8 enc2; /* Required string encoding (TEXT_Utf* value) */ + + /* If this is not a string, do nothing. */ + if( !(pMem->flags&MEM_Str) || pMem->flags&MEM_Int || pMem->flags&MEM_Real ){ return SQLITE_OK; } - f = (pMem->flags)&(MEM_Utf8|MEM_Utf16le|MEM_Utf16be|MEM_Term); - assert( flags==(flags&(MEM_Utf8|MEM_Utf16le|MEM_Utf16be|MEM_Term))); - if( f==flags ){ - return SQLITE_OK; - } + enc1 = flagsToEnc(pMem->flags); + enc2 = flagsToEnc(flags); - if( (SQLITE3_BIGENDIAN && (f&MEM_Utf16le)) || - (SQLITE3_LITTLEENDIAN && (f&MEM_Utf16be)) ){ - int i; - for(i=0; in; i+=2){ - char c = pMem->z[i]; - pMem->z[i] = pMem->z[i+1]; - pMem->z[i+1] = c; + if( enc1!=enc2 ){ + /* If the current encoding does not match the desired encoding, then + ** we will need to do some translation between encodings. + */ + char *z; + int n; + int rc = sqlite3utfTranslate(pMem->z, pMem->n, enc1, (void **)&z, &n, enc2); + if( rc!=SQLITE_OK ){ + return rc; } - } - if( (flags&MEM_Utf8) && (f&(MEM_Utf16le|MEM_Utf16be)) ){ - char *z = sqlite3utf16to8(pMem->z, pMem->n); - if( !z ){ - return SQLITE_NOMEM; - } - Release(pMem); + /* Result of sqlite3utfTranslate is currently always dynamically + ** allocated and nul terminated. This might be altered as a performance + ** enhancement later. + */ pMem->z = z; - pMem->n = strlen(z)+1; - pMem->flags = (MEM_Utf8|MEM_Dyn|MEM_Str|MEM_Term); - return SQLITE_OK; + pMem->n = n; + pMem->flags = (MEM_Str | MEM_Dyn | MEM_Term | flags); } - if( (flags&MEM_Utf16le) && (f&MEM_Utf8) ){ - char *z = sqlite3utf8to16le(pMem->z, pMem->n); - if( !z ){ - return SQLITE_NOMEM; + if( (flags&MEM_Term) && !(pMem->flags&MEM_Term) ){ + /* If we did not do any translation, but currently the string is + ** not nul terminated (and is required to be), then we add the + ** nul terminator now. We never have to do this if we translated + ** the encoding of the string, as the translation functions return + ** nul terminated values. + */ + int f = pMem->flags; + int nulTermLen = 2; /* The number of 0x00 bytes to append */ + if( enc2==MEM_Utf8 ){ + nulTermLen = 1; } - Release(pMem); - pMem->z = z; - pMem->n = sqlite3utf16ByteLen(z, -1) + 2; - pMem->flags = (MEM_Utf16le|MEM_Dyn|MEM_Str|MEM_Term); - return SQLITE_OK; - } - if( (flags&MEM_Utf16be) && (f&MEM_Utf8) ){ - char *z = sqlite3utf8to16be(pMem->z, pMem->n); - if( !z ){ - return SQLITE_NOMEM; + if( pMem->n+nulTermLen<=NBFS ){ + /* If the string plus the nul terminator will fit in the Mem.zShort + ** buffer, and it is not already stored there, copy it there. + */ + if( !(f&MEM_Short) ){ + memcpy(pMem->z, pMem->zShort, pMem->n); + if( f&MEM_Dyn ){ + sqliteFree(pMem->z); + } + pMem->z = pMem->zShort; + pMem->flags &= ~(MEM_Static|MEM_Ephem|MEM_Dyn); + pMem->flags |= MEM_Short; + } + }else{ + /* Otherwise we have to malloc for memory. If the string is already + ** dynamic, use sqliteRealloc(). Otherwise sqliteMalloc() enough + ** space for the string and the nul terminator, and copy the string + ** data there. + */ + if( f&MEM_Dyn ){ + pMem->z = (char *)sqliteRealloc(pMem->z, pMem->n+nulTermLen); + if( !pMem->z ){ + return SQLITE_NOMEM; + } + }else{ + char *z = (char *)sqliteMalloc(pMem->n+nulTermLen); + memcpy(z, pMem->z, pMem->n); + pMem->z = z; + pMem->flags &= ~(MEM_Static|MEM_Ephem|MEM_Short); + pMem->flags |= MEM_Dyn; + } } - Release(pMem); - pMem->z = z; - pMem->n = sqlite3utf16ByteLen(z, -1) + 2; - pMem->flags = (MEM_Utf16be|MEM_Dyn|MEM_Str|MEM_Term); - return SQLITE_OK; - } - if( (flags&MEM_Term) && !(f&&MEM_Term) ){ - NulTermify(pMem); + /* pMem->z now points at the string data, with enough space at the end + ** to insert the nul nul terminator. pMem->n has not yet been updated. + */ + memcpy(&pMem->z[pMem->n], "\0\0", nulTermLen); + pMem->n += nulTermLen; + pMem->flags |= MEM_Term; } - return SQLITE_OK; } +int sqlite3VdbeSetEncoding(Mem *pMem, u8 enc){ + switch( enc ){ + case TEXT_Utf8: + return SetEncoding(pMem, MEM_Utf8); + case TEXT_Utf16le: + return SetEncoding(pMem, MEM_Utf16le); + case TEXT_Utf16be: + return SetEncoding(pMem, MEM_Utf16be); + default: + assert(0); + } + return SQLITE_INTERNAL; +} + /* ** Convert the given stack entity into a string that has been obtained ** from sqliteMalloc(). This is different from Stringify() above in that @@ -840,11 +933,11 @@ static void applyAffinity(Mem *pRec, char affinity){ } } +#ifndef NDEBUG /* ** Write a nice string representation of the contents of cell pMem ** into buffer zBuf, length nBuf. */ -#ifndef NDEBUG void prettyPrintMem(Mem *pMem, char *zBuf, int nBuf){ char *zCsr = zBuf; int f = pMem->flags; @@ -865,7 +958,8 @@ void prettyPrintMem(Mem *pMem, char *zBuf, int nBuf){ c = 's'; } - zCsr += sprintf(zCsr, "%c[", c); + zCsr += sprintf(zCsr, "%c", c); + zCsr += sprintf(zCsr, "%d[", pMem->n); for(i=0; i<16 && in; i++){ zCsr += sprintf(zCsr, "%02X ", ((int)pMem->z[i] & 0xFF)); } @@ -876,10 +970,47 @@ void prettyPrintMem(Mem *pMem, char *zBuf, int nBuf){ } zCsr += sprintf(zCsr, "]"); + *zCsr = '\0'; + }else if( f & MEM_Str ){ + int j, k; + zBuf[0] = ' '; + if( f & MEM_Dyn ){ + zBuf[1] = 'z'; + assert( (f & (MEM_Static|MEM_Ephem))==0 ); + }else if( f & MEM_Static ){ + zBuf[1] = 't'; + assert( (f & (MEM_Dyn|MEM_Ephem))==0 ); + }else if( f & MEM_Ephem ){ + zBuf[1] = 'e'; + assert( (f & (MEM_Static|MEM_Dyn))==0 ); + }else{ + zBuf[1] = 's'; + } + k = 2; + k += sprintf(&zBuf[k], "%d", pMem->n); + zBuf[k++] = '['; + for(j=0; j<15 && jn; j++){ + u8 c = pMem->z[j]; + if( c==0 && j==pMem->n-1 ) break; +/* + zBuf[k++] = "0123456789ABCDEF"[c>>4]; + zBuf[k++] = "0123456789ABCDEF"[c&0xf]; +*/ + if( c>=0x20 && c<0x7f ){ + zBuf[k++] = c; + }else{ + zBuf[k++] = '.'; + } + } + zBuf[k++] = ']'; + zBuf[k++] = 0; } - - *zCsr = '\0'; } + +/* Temporary - this is useful in conjunction with prettyPrintMem whilst +** debugging. +*/ +char zGdbBuf[100]; #endif /* @@ -1264,37 +1395,13 @@ case OP_Variable: { Mem *pVar; assert( j>=0 && jnVar ); - /* If we need to translate between text encodings, do it now. If this is - ** required, then put the new string in p->apVar. This way, if the - ** variable is used again, even after the virtual machine is reset, the - ** conversion won't have to be done again. - ** - ** FIX ME: This is where we need to support databases that use other than - ** UTF-8 on disk. + /* Ensure the variable string (if it is a string) is UTF-8 encoded and + ** nul terminated. Do the transformation on the variable before it + ** is copied onto the stack, in case it is used again before this VDBE is + ** finalized. */ pVar = &p->apVar[j]; - if( pVar->flags&MEM_Str && !(pVar->flags&MEM_Utf8) ){ - char *zUtf8; - assert( pVar->flags&(MEM_Utf16le|MEM_Utf16be) ); - zUtf8 = sqlite3utf16to8(pVar->z, pVar->n); - if( !zUtf8 ){ - goto no_mem; - } - Release(pVar); - pVar->z = zUtf8; - pVar->n = strlen(zUtf8)+1; - pVar->flags = MEM_Str|MEM_Dyn|MEM_Utf8|MEM_Term; - } - - /* Ensure that the variable value is nul terminated. Again, do this in - ** place. - ** - ** FIX ME: The rest of the vdbe will soon understand MEM_Term, making - ** this step unnecessary. - */ - if( pVar->flags&MEM_Str ){ - NulTermify(pVar); - } + SetEncoding(pVar, MEM_Utf8|MEM_Term); /* Copy the value in pVar to the top of the stack. If pVar is a string or ** a blob just store a pointer to the same memory, do not make a copy. @@ -1531,7 +1638,7 @@ case OP_Concat: { } pTos++; pTos->n = nByte; - pTos->flags = MEM_Str|MEM_Dyn|MEM_Utf8; + pTos->flags = MEM_Str|MEM_Dyn|MEM_Utf8|MEM_Term; pTos->z = zNew; break; } @@ -1693,6 +1800,9 @@ case OP_Function: { popStack(&pTos, n); pTos++; *pTos = ctx.s; + if( pTos->flags & MEM_Str ){ + pTos->flags |= MEM_Term; + } if( pTos->flags & MEM_Short ){ pTos->z = pTos->zShort; } @@ -2311,7 +2421,11 @@ case OP_Column: { } off += off2; - sqlite3VdbeSerialGet(&zRec[off], colType, pTos); + sqlite3VdbeSerialGet(&zRec[off], colType, pTos, p->db->enc); + rc = SetEncoding(pTos, MEM_Utf8|MEM_Term); + if( rc!=SQLITE_OK ){ + goto abort_due_to_error; + } break; } @@ -2422,7 +2536,11 @@ case OP_Column: { getBtreeMem(pCrsr, offset, len, pC->keyAsData, &sMem); zData = sMem.z; } - sqlite3VdbeSerialGet(zData, pC->aType[p2], pTos); + sqlite3VdbeSerialGet(zData, pC->aType[p2], pTos, p->db->enc); + rc = SetEncoding(pTos, MEM_Utf8|MEM_Term); + if( rc!=SQLITE_OK ){ + goto abort_due_to_error; + } Release(&sMem); break; @@ -2491,6 +2609,7 @@ case OP_MakeRecord: { if( zAffinity ){ applyAffinity(pRec, zAffinity[pRec-pData0]); } + SetEncoding(pRec, encToFlags(p->db->enc)); serial_type = sqlite3VdbeSerialType(pRec); nBytes += sqlite3VdbeSerialTypeLen(serial_type); nBytes += sqlite3VarintLen(serial_type); @@ -2614,6 +2733,7 @@ case OP_MakeIdxKey: { if( pRec->flags&MEM_Null ){ containsNull = 1; } + SetEncoding(pRec, encToFlags(p->db->enc)); serial_type = sqlite3VdbeSerialType(pRec); nByte += sqlite3VarintLen(serial_type); nByte += sqlite3VdbeSerialTypeLen(serial_type); @@ -2645,7 +2765,8 @@ case OP_MakeIdxKey: { /* Build the key in the buffer pointed to by zKey. */ for(pRec=pData0; pRec<=pTos; pRec++){ - offset += sqlite3PutVarint(&zKey[offset], sqlite3VdbeSerialType(pRec)); + u64 serial_type = sqlite3VdbeSerialType(pRec); + offset += sqlite3PutVarint(&zKey[offset], serial_type); offset += sqlite3VdbeSerialPut(&zKey[offset], pRec); } if( addRowid ){ @@ -2968,6 +3089,7 @@ case OP_OpenWrite: { pCur->pKeyInfo = (KeyInfo*)pOp->p3; if( pCur->pKeyInfo ){ pCur->pIncrKey = &pCur->pKeyInfo->incrKey; + pCur->pKeyInfo->enc = p->db->enc; }else{ pCur->pIncrKey = &pCur->bogusIncrKey; } @@ -3051,6 +3173,7 @@ case OP_OpenTemp: { rc = sqlite3BtreeCursor(pCx->pBt, pgno, 1, sqlite3VdbeKeyCompare, pOp->p3, &pCx->pCursor); pCx->pKeyInfo = (KeyInfo*)pOp->p3; + pCx->pKeyInfo->enc = p->db->enc; pCx->pIncrKey = &pCx->pKeyInfo->incrKey; } }else{ @@ -3824,7 +3947,8 @@ case OP_IdxColumn: { } pTos++; - sqlite3VdbeSerialGet(&zData[len], serial_type, pTos); + sqlite3VdbeSerialGet(&zData[len], serial_type, pTos, p->db->enc); + SetEncoding(pTos, MEM_Utf8|MEM_Term); if( freeZData ){ sqliteFree(zData); } @@ -4585,6 +4709,7 @@ case OP_SortPut: { case OP_Sort: { int i; KeyInfo *pKeyInfo = (KeyInfo*)pOp->p3; + pKeyInfo->enc = p->db->enc; Sorter *pElem; Sorter *apSorter[NSORT]; for(i=0; itrace, " i:%lld", pTos[i].i); }else if( pTos[i].flags & MEM_Real ){ fprintf(p->trace, " r:%g", pTos[i].r); - }else if( pTos[i].flags & MEM_Str ){ - int j, k; - char zBuf[100]; - zBuf[0] = ' '; - if( pTos[i].flags & MEM_Dyn ){ - zBuf[1] = 'z'; - assert( (pTos[i].flags & (MEM_Static|MEM_Ephem))==0 ); - }else if( pTos[i].flags & MEM_Static ){ - zBuf[1] = 't'; - assert( (pTos[i].flags & (MEM_Dyn|MEM_Ephem))==0 ); - }else if( pTos[i].flags & MEM_Ephem ){ - zBuf[1] = 'e'; - assert( (pTos[i].flags & (MEM_Static|MEM_Dyn))==0 ); - }else{ - zBuf[1] = 's'; - } - zBuf[2] = '['; - k = 3; - for(j=0; j<15 && j>4]; - zBuf[k++] = "0123456789ABCDEF"[c&0xf]; - if( c>=0x20 && c<0x7f ){ - zBuf[k++] = c; - }else{ - zBuf[k++] = '.'; - } - } - zBuf[k++] = ']'; - zBuf[k++] = 0; - fprintf(p->trace, "%s", zBuf); }else{ char zBuf[100]; prettyPrintMem(pTos, zBuf, 100); diff --git a/src/vdbeInt.h b/src/vdbeInt.h index 96a0f8720d..9340595232 100644 --- a/src/vdbeInt.h +++ b/src/vdbeInt.h @@ -147,23 +147,24 @@ typedef struct Mem Mem; #define MEM_Int 0x0004 /* Value is an integer */ #define MEM_Real 0x0008 /* Value is a real number */ #define MEM_Blob 0x0010 /* Value is a BLOB */ +#define MEM_Struct 0x0020 /* Value is some kind of struct */ -#define MEM_Term 0x1000 /* String has a nul terminator character */ +#define MEM_Utf8 0x0040 /* String uses UTF-8 encoding */ +#define MEM_Utf16be 0x0080 /* String uses UTF-16 big-endian */ +#define MEM_Utf16le 0x0100 /* String uses UTF-16 little-endian */ +#define MEM_Term 0x0200 /* String has a nul terminator character */ -#define MEM_Utf8 0x0020 /* String uses UTF-8 encoding */ -#define MEM_Utf16be 0x0040 /* String uses UTF-16 big-endian */ -#define MEM_Utf16le 0x0080 /* String uses UTF-16 little-endian */ +#define MEM_Dyn 0x0400 /* Need to call sqliteFree() on Mem.z */ +#define MEM_Static 0x0800 /* Mem.z points to a static string */ +#define MEM_Ephem 0x1000 /* Mem.z points to an ephemeral string */ +#define MEM_Short 0x2000 /* Mem.z points to Mem.zShort */ -#define MEM_Dyn 0x0100 /* Need to call sqliteFree() on Mem.z */ -#define MEM_Static 0x0200 /* Mem.z points to a static string */ -#define MEM_Ephem 0x0400 /* Mem.z points to an ephemeral string */ -#define MEM_Short 0x0800 /* Mem.z points to Mem.zShort */ /* The following MEM_ value appears only in AggElem.aMem.s.flag fields. ** It indicates that the corresponding AggElem.aMem.z points to a ** aggregate function context that needs to be finalized. */ -#define MEM_AggCtx 0x1000 /* Mem.z points to an agg function context */ +#define MEM_AggCtx 0x4000 /* Mem.z points to an agg function context */ /* ** The "context" argument for a installable function. A pointer to an @@ -329,9 +330,9 @@ int sqlite3VdbeCursorMoveto(Cursor*); void sqlite3VdbePrintOp(FILE*, int, Op*); #endif int sqlite3VdbeSerialTypeLen(u64); -u64 sqlite3VdbeSerialType(const Mem *); -int sqlite3VdbeSerialPut(unsigned char *, const Mem *); -int sqlite3VdbeSerialGet(const unsigned char *, u64, Mem *); +u64 sqlite3VdbeSerialType(Mem *); +int sqlite3VdbeSerialPut(unsigned char *, Mem *); +int sqlite3VdbeSerialGet(const unsigned char *, u64, Mem *, u8 enc); int sqlite2BtreeKeyCompare(BtCursor *, const void *, int, int, int *); int sqlite3VdbeIdxKeyCompare(Cursor*, int , const unsigned char*, int*); @@ -341,3 +342,5 @@ int sqlite3VdbeKeyCompare(void*,int,const void*,int, const void*); int sqlite3VdbeRowCompare(void*,int,const void*,int, const void*); int sqlite3VdbeExec(Vdbe*); int sqlite3VdbeList(Vdbe*); +int sqlite3VdbeSetEncoding(Mem *, u8); + diff --git a/src/vdbeaux.c b/src/vdbeaux.c index 8a7795265c..9c6aa264b6 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -1135,7 +1135,7 @@ int sqlite3_bind_int64(sqlite3_stmt *p, int i, long long int iValue){ pVar->flags = MEM_Int; pVar->i = iValue; } - return SQLITE_OK; + return rc; } /* @@ -1199,7 +1199,13 @@ int sqlite3_bind_text16( int nData, int eCopy ){ - int flags = MEM_Str|MEM_Utf16le|MEM_Utf16be; + int flags; + + if( SQLITE3_BIGENDIAN ){ + flags = MEM_Str|MEM_Utf16be; + }else{ + flags = MEM_Str|MEM_Utf16le; + } if( zData ){ /* If nData is less than zero, measure the length of the string. @@ -1362,7 +1368,7 @@ int sqlite3VdbeCursorMoveto(Cursor *p){ /* ** Return the serial-type for the value stored in pMem. */ -u64 sqlite3VdbeSerialType(const Mem *pMem){ +u64 sqlite3VdbeSerialType(Mem *pMem){ int flags = pMem->flags; if( flags&MEM_Null ){ @@ -1380,12 +1386,13 @@ u64 sqlite3VdbeSerialType(const Mem *pMem){ return 5; } if( flags&MEM_Str ){ - /* We assume that the string is NULL-terminated. We don't store the - ** NULL-terminator - it is implied by the string storage class. - */ + u64 t; assert( pMem->n>0 ); - assert( pMem->z[pMem->n-1]=='\0' ); - return (pMem->n*2 + 11); /* (pMem->n-1)*2 + 13 */ + t = (pMem->n*2) + 13; + if( pMem->flags&MEM_Term ){ + t -= ((pMem->flags&MEM_Utf8)?2:4); + } + return t; } if( flags&MEM_Blob ){ return (pMem->n*2 + 12); @@ -1415,7 +1422,7 @@ int sqlite3VdbeSerialTypeLen(u64 serial_type){ ** buf. It is assumed that the caller has allocated sufficient space. ** Return the number of bytes written. */ -int sqlite3VdbeSerialPut(unsigned char *buf, const Mem *pMem){ +int sqlite3VdbeSerialPut(unsigned char *buf, Mem *pMem){ u64 serial_type = sqlite3VdbeSerialType(pMem); int len; @@ -1454,7 +1461,12 @@ int sqlite3VdbeSerialPut(unsigned char *buf, const Mem *pMem){ ** Deserialize the data blob pointed to by buf as serial type serial_type ** and store the result in pMem. Return the number of bytes read. */ -int sqlite3VdbeSerialGet(const unsigned char *buf, u64 serial_type, Mem *pMem){ +int sqlite3VdbeSerialGet( + const unsigned char *buf, + u64 serial_type, + Mem *pMem, + u8 enc +){ int len; assert( serial_type!=0 ); @@ -1486,7 +1498,7 @@ int sqlite3VdbeSerialGet(const unsigned char *buf, u64 serial_type, Mem *pMem){ pMem->r = *(double*)&v; }else{ pMem->flags = MEM_Int; - pMem->i = *(int*)&v; + pMem->i = *(i64*)&v; } return len; } @@ -1495,8 +1507,19 @@ int sqlite3VdbeSerialGet(const unsigned char *buf, u64 serial_type, Mem *pMem){ assert( serial_type>=12 ); len = sqlite3VdbeSerialTypeLen(serial_type); if( serial_type&0x01 ){ - pMem->flags = MEM_Str|MEM_Utf8; - pMem->n = len+1; + switch( enc ){ + case TEXT_Utf8: + pMem->flags = MEM_Str|MEM_Utf8|MEM_Term; + break; + case TEXT_Utf16le: + pMem->flags = MEM_Str|MEM_Utf16le|MEM_Term; + break; + case TEXT_Utf16be: + pMem->flags = MEM_Str|MEM_Utf16be|MEM_Term; + break; + assert(0); + } + pMem->n = len+(enc==TEXT_Utf8?1:2); }else{ pMem->flags = MEM_Blob; pMem->n = len; @@ -1516,6 +1539,9 @@ int sqlite3VdbeSerialGet(const unsigned char *buf, u64 serial_type, Mem *pMem){ memcpy(pMem->z, buf, len); if( pMem->flags&MEM_Str ){ pMem->z[len] = '\0'; + if( enc!=TEXT_Utf8 ){ + pMem->z[len+1] = '\0'; + } } return len; @@ -1635,6 +1661,7 @@ int sqlite3VdbeKeyCompare( int offset2 = 0; int i = 0; int rc = 0; + u8 enc = pKeyInfo->enc; const unsigned char *aKey1 = (const unsigned char *)pKey1; const unsigned char *aKey2 = (const unsigned char *)pKey2; @@ -1675,8 +1702,8 @@ int sqlite3VdbeKeyCompare( ** the file is corrupted. Then read the value from each key into mem1 ** and mem2 respectively. */ - offset1 += sqlite3VdbeSerialGet(&aKey1[offset1], serial_type1, &mem1); - offset2 += sqlite3VdbeSerialGet(&aKey2[offset2], serial_type2, &mem2); + offset1 += sqlite3VdbeSerialGet(&aKey1[offset1], serial_type1, &mem1, enc); + offset2 += sqlite3VdbeSerialGet(&aKey2[offset2], serial_type2, &mem2, enc); rc = sqlite3MemCompare(&mem1, &mem2, pKeyInfo->aColl[i]); if( mem1.flags&MEM_Dyn ){ @@ -1734,6 +1761,7 @@ int sqlite3VdbeRowCompare( int toffset1 = 0; int toffset2 = 0; int i; + u8 enc = pKeyInfo->enc; const unsigned char *aKey1 = (const unsigned char *)pKey1; const unsigned char *aKey2 = (const unsigned char *)pKey2; @@ -1764,8 +1792,8 @@ int sqlite3VdbeRowCompare( ** the file is corrupted. Then read the value from each key into mem1 ** and mem2 respectively. */ - offset1 += sqlite3VdbeSerialGet(&aKey1[offset1], serial_type1, &mem1); - offset2 += sqlite3VdbeSerialGet(&aKey2[offset2], serial_type2, &mem2); + offset1 += sqlite3VdbeSerialGet(&aKey1[offset1], serial_type1, &mem1, enc); + offset2 += sqlite3VdbeSerialGet(&aKey2[offset2], serial_type2, &mem2, enc); rc = sqlite3MemCompare(&mem1, &mem2, pKeyInfo->aColl[i]); if( mem1.flags&MEM_Dyn ){