diff --git a/manifest b/manifest index 3ed55a440e..7ed53265d2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sthe\sOP_SetInsert\sopcode.\s(CVS\s1443) -D 2004-05-22T21:30:41 +C Begin\schanging\sthe\svdbe\sso\sall\sstack\svalues\suse\sthe\sdatabase\sencoding.\s(CVS\s1444) +D 2004-05-23T13:30:58 F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -54,8 +54,8 @@ F src/printf.c ef750e8e2398ca7e8b58be991075f08c6a7f0e53 F src/random.c eff68e3f257e05e81eae6c4d50a51eb88beb4ff3 F src/select.c 7d77a8bed7eeac23216d42fc1be006fb4352fcdc F src/shell.c 657623c2a3df126538d41842c2146cadbd52b154 -F src/sqlite.h.in 9a4c374f4030cde181593166d71bf376274ca45c -F src/sqliteInt.h 2d7b2c7e734f95c4e21513224cb7815adea86e2a +F src/sqlite.h.in 69393dbaa5b11853685ae656d1bef6a98b808bbb +F src/sqliteInt.h 823411100924138073aa542af4aae8bd5eede4a3 F src/table.c af14284fa36c8d41f6829e3f2819dce07d3e2de2 F src/tclsqlite.c f241854328ee2b06006efded270d84799159f760 F src/test1.c b5f2f9f9d866c8a586b8d47c5999d2cbefaac686 @@ -66,13 +66,13 @@ F src/test5.c 9a1f15133f6955f067c5246e564723b5f23ff221 F src/tokenize.c e7536dd31205d5afb76c1bdc832dea009c7a3847 F src/trigger.c 11afe9abfba13a2ba142944c797c952e162d117f F src/update.c 1a5e9182596f3ea8c7a141e308a3d2a7e5689fee -F src/utf.c a4640c6a1530b43b651495246349ee0a6eca3038 +F src/utf.c 441c5918ee3777cd8e9611cbb810312ed314737d F src/util.c 5cbeb452da09cfc7248de9948c15b14d840723f7 F src/vacuum.c 8734f89742f246abd91dbd3e087fc153bddbfbad -F src/vdbe.c 316491eb9b02fb6a385ffe8d5256eab40a254171 +F src/vdbe.c b40ff0912ddfae65d9a90ca38302b74a04c6ee76 F src/vdbe.h 391d5642a83af686f35c228fcd36cb4456d68f44 -F src/vdbeInt.h e3f2643c62a8d958f760fb307074d1c5d868c9ab -F src/vdbeaux.c 1e5262696aa32a50ca691a5669f6ed534655a94c +F src/vdbeInt.h 6c2444a60fc030b275dc0cff407cdaa79d84ce86 +F src/vdbeaux.c 60fa2357fc12dec128d9606e5b84cec836aa7e40 F src/where.c efe5d25fe18cd7381722457898cd863e84097a0c F test/all.test 569a92a8ee88f5300c057cc4a8f50fbbc69a3242 F test/attach.test cb9b884344e6cfa5e165965d5b1adea679a24c83 @@ -129,7 +129,7 @@ F test/pager2.test 7ff175a28484fd324df9315dfe35f6fb159910ec F test/pragma.test e763be8238c8a5a0cd8b75e8eec70b957da6081b F test/printf.test 46b3d07d59d871d0831b4a657f6dfcafe0574850 F test/progress.test 701b6115c2613128ececdfe1398a1bd0e1a4cfb3 x -F test/quick.test 4e4b45ac941c1d8b4c29fb66b119ed5362c368e8 +F test/quick.test f5d81aa4d609a2a47e9000a43c58ae78b359b868 F test/quote.test 08f23385c685d3dc7914ec760d492cacea7f6e3d F test/rowid.test 863e6e75878cccf03d166fe52023f20e09508683 F test/select1.test 3bfcccd2eadcddbb07f1f5da6550aee8484ea4fb @@ -202,7 +202,7 @@ F www/sqlite.tcl 3c83b08cf9f18aa2d69453ff441a36c40e431604 F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P 4159ef235d780ec941677439e77c6fa96e24997c -R 5d98176ba2ec77b07a87ae923b3b9934 -U drh -Z 9374b217aab3cb0d12f84dd869677fb1 +P 18e690e405710c9a8010340c01754bbfa3231fe9 +R a90f19b4bed57bb3a6862d24faf34718 +U danielk1977 +Z 72938d5d6b2f4afcbbfc88e30cdfc1f0 diff --git a/manifest.uuid b/manifest.uuid index 8f8f6c4141..d76215f2da 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -18e690e405710c9a8010340c01754bbfa3231fe9 \ No newline at end of file +f47de3a933b51b37629a0ca2e492a534a12e7339 \ No newline at end of file diff --git a/src/sqlite.h.in b/src/sqlite.h.in index ebc976ed32..c8f0717fce 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -12,7 +12,7 @@ ** This header file defines the interface that the SQLite library ** presents to client programs. ** -** @(#) $Id: sqlite.h.in,v 1.70 2004/05/22 10:33:04 danielk1977 Exp $ +** @(#) $Id: sqlite.h.in,v 1.71 2004/05/23 13:30:58 danielk1977 Exp $ */ #ifndef _SQLITE_H_ #define _SQLITE_H_ @@ -1235,7 +1235,7 @@ int sqlite3_open16( ** error code, or before sqlite3_step() has been called on a ** compiled SQL statement, this routine returns zero. */ -int sqlite3_value_count(sqlite3_stmt *pStmt); +int sqlite3_data_count(sqlite3_stmt *pStmt); #define SQLITE3_INTEGER 1 #define SQLITE3_FLOAT 2 @@ -1340,6 +1340,16 @@ long long int sqlite3_column_int(sqlite3_stmt*,int); */ double sqlite3_column_float(sqlite3_stmt*,int); +typedef struct Mem sqlite3_value; + +int sqlite3_value_type(sqlite3_value*); +int sqlite3_value_bytes(sqlite3_value*); +int sqlite3_value_bytes16(sqlite3_value*); +const unsigned char *sqlite3_value_data(sqlite3_value*); +const void *sqlite3_value_data16(sqlite3_value*); +long long int sqlite3_value_int(sqlite3_value*); +double sqlite3_value_float(sqlite3_value*); + #ifdef __cplusplus } /* End of the 'extern "C"' block */ #endif diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 5c233e9f31..18859cb8b6 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.246 2004/05/22 17:41:59 drh Exp $ +** @(#) $Id: sqliteInt.h,v 1.247 2004/05/23 13:30:58 danielk1977 Exp $ */ #include "config.h" #include "sqlite.h" @@ -329,7 +329,7 @@ struct Db { #define TEXT_Utf8 1 #define TEXT_Utf16le 2 #define TEXT_Utf16be 3 -#define TEXT_Utf16 4 +/* #define TEXT_Utf16 4 */ /* ** Each database is an instance of the following structure. @@ -1371,3 +1371,4 @@ char sqlite3ExprAffinity(Expr *pExpr); int sqlite3atoi64(const char*, i64*); void sqlite3Error(sqlite *, int, const char*,...); int sqlite3utfTranslate(const void *, int , u8 , void **, int *, u8); +u8 sqlite3UtfReadBom(const void *zData, int nData); diff --git a/src/utf.c b/src/utf.c index 5548de258d..cb093144eb 100644 --- a/src/utf.c +++ b/src/utf.c @@ -12,7 +12,7 @@ ** This file contains routines used to translate between UTF-8, ** UTF-16, UTF-16BE, and UTF-16LE. ** -** $Id: utf.c,v 1.8 2004/05/22 17:41:59 drh Exp $ +** $Id: utf.c,v 1.9 2004/05/23 13:30:58 danielk1977 Exp $ ** ** Notes on UTF-8: ** @@ -122,6 +122,27 @@ static int readUtf16Bom(UtfString *pStr, int big_endian){ return big_endian; } +/* +** zData is a UTF-16 encoded string, nData bytes in length. This routine +** checks if there is a byte-order mark at the start of zData. If no +** byte order mark is found 0 is returned. Otherwise TEXT_Utf16be or +** TEXT_Utf16le is returned, depending on whether The BOM indicates that +** the text is big-endian or little-endian. +*/ +u8 sqlite3UtfReadBom(const void *zData, int nData){ + if( nData<0 || nData>1 ){ + u8 b1 = *(u8 *)zData; + u8 b2 = *(((u8 *)zData) + 1); + if( b1==0xFE && b2==0xFF ){ + return TEXT_Utf16be; + } + if( b1==0xFF && b2==0xFE ){ + return TEXT_Utf16le; + } + } + return 0; +} + /* ** Read a single unicode character from the UTF-8 encoded string *pStr. The diff --git a/src/vdbe.c b/src/vdbe.c index 602c238b6f..2954b287eb 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -43,7 +43,7 @@ ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. ** -** $Id: vdbe.c,v 1.320 2004/05/22 21:30:41 drh Exp $ +** $Id: vdbe.c,v 1.321 2004/05/23 13:30:58 danielk1977 Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -194,6 +194,14 @@ static int encToFlags(u8 enc){ assert(0); } +/* +** Set the encoding flags of memory cell "pMem" to the correct values +** for the database encoding "enc" (one of TEXT_Utf8, TEXT_Utf16le or +** TEXT_Utf16be). +*/ +#define SetEncodingFlags(pMem, enc) (pMem->flags = \ +(pMem->flags & ~(MEM_Utf8|MEM_Utf16le|MEM_Utf16be)) | encToFlags(enc)) + /* ** If pMem is a string object, this routine sets the encoding of the string ** (to one of UTF-8 or UTF16) and whether or not the string is @@ -297,20 +305,6 @@ int SetEncoding(Mem *pMem, int flags){ return SQLITE_OK; } -int sqlite3VdbeSetEncoding(Mem *pMem, u8 enc){ - switch( enc ){ - case TEXT_Utf8: - return SetEncoding(pMem, MEM_Utf8); - case TEXT_Utf16le: - return SetEncoding(pMem, MEM_Utf16le); - case TEXT_Utf16be: - return SetEncoding(pMem, MEM_Utf16be); - default: - assert(0); - } - return SQLITE_INTERNAL; -} - /* ** Convert the given stack entity into a string that has been obtained ** from sqliteMalloc(). This is different from Stringify() above in that @@ -387,7 +381,7 @@ int sqlite3_step( if( rc==SQLITE_DONE || rc==SQLITE_ROW ){ int i; int cols = sqlite3_column_count(pStmt) * (pazColName?1:0); - int vals = sqlite3_value_count(pStmt) * (pazValue?1:0); + int vals = sqlite3_data_count(pStmt) * (pazValue?1:0); /* Temporary memory leak */ if( cols ) *pazColName = sqliteMalloc(sizeof(char *)*cols * 2); @@ -454,7 +448,7 @@ int sqlite3_column_count(sqlite3_stmt *pStmt){ ** Return the number of values available from the current row of the ** currently executing statement pStmt. */ -int sqlite3_value_count(sqlite3_stmt *pStmt){ +int sqlite3_data_count(sqlite3_stmt *pStmt){ Vdbe *pVm = (Vdbe *)pStmt; if( !pVm->resOnStack ) return 0; return pVm->nResColumn; @@ -469,22 +463,37 @@ const unsigned char *sqlite3_column_data(sqlite3_stmt *pStmt, int i){ Vdbe *pVm = (Vdbe *)pStmt; Mem *pVal; - vals = sqlite3_value_count(pStmt); + vals = sqlite3_data_count(pStmt); if( i>=vals || i<0 ){ sqlite3Error(pVm->db, SQLITE_RANGE, 0); return 0; } pVal = &pVm->pTos[(1-vals)+i]; + SetEncodingFlags(pVal, pVm->db->enc); + return sqlite3_value_data((sqlite3_value *)pVal); +} + +const unsigned char *sqlite3_value_data(sqlite3_value* pVal){ if( pVal->flags&MEM_Null ){ return 0; } - if( !(pVal->flags&MEM_Blob) ){ - Stringify(pVal); - SetEncoding(pVal, MEM_Utf8|MEM_Term); + if( pVal->flags&MEM_Str && !(pVal->flags&MEM_Utf8) ){ + char *z = 0; + int n; + u8 enc = flagsToEnc(pVal->flags); + if( sqlite3utfTranslate(pVal->z,pVal->n,enc,(void **)&z,&n,TEXT_Utf8) ){ + return 0; + } + Release(pVal); + pVal->z = z; + pVal->n = n; + SetEncodingFlags(pVal, TEXT_Utf8); + }else{ + Stringify(pVal); + } } - return pVal->z; } @@ -497,7 +506,7 @@ const void *sqlite3_column_data16(sqlite3_stmt *pStmt, int i){ Vdbe *pVm = (Vdbe *)pStmt; Mem *pVal; - vals = sqlite3_value_count(pStmt); + vals = sqlite3_data_count(pStmt); if( i>=vals || i<0 ){ sqlite3Error(pVm->db, SQLITE_RANGE, 0); return 0; @@ -511,9 +520,9 @@ const void *sqlite3_column_data16(sqlite3_stmt *pStmt, int i){ if( !(pVal->flags&MEM_Blob) ){ Stringify(pVal); if( SQLITE3_BIGENDIAN ){ - SetEncoding(pVal, MEM_Utf16be|MEM_Term); + /* SetEncoding(pVal, MEM_Utf16be|MEM_Term); */ }else{ - SetEncoding(pVal, MEM_Utf16le|MEM_Term); + /* SetEncoding(pVal, MEM_Utf16le|MEM_Term); */ } } @@ -528,7 +537,7 @@ int sqlite3_column_bytes(sqlite3_stmt *pStmt, int i){ Vdbe *pVm = (Vdbe *)pStmt; if( sqlite3_column_data(pStmt, i) ){ - int vals = sqlite3_value_count(pStmt); + int vals = sqlite3_data_count(pStmt); return pVm->pTos[(1-vals)+i].n; } return 0; @@ -542,7 +551,7 @@ int sqlite3_column_bytes16(sqlite3_stmt *pStmt, int i){ Vdbe *pVm = (Vdbe *)pStmt; if( sqlite3_column_data16(pStmt, i) ){ - int vals = sqlite3_value_count(pStmt); + int vals = sqlite3_data_count(pStmt); return pVm->pTos[(1-vals)+i].n; } return 0; @@ -557,7 +566,7 @@ long long int sqlite3_column_int(sqlite3_stmt *pStmt, int i){ Vdbe *pVm = (Vdbe *)pStmt; Mem *pVal; - vals = sqlite3_value_count(pStmt); + vals = sqlite3_data_count(pStmt); if( i>=vals || i<0 ){ sqlite3Error(pVm->db, SQLITE_RANGE, 0); return 0; @@ -577,7 +586,7 @@ double sqlite3_column_float(sqlite3_stmt *pStmt, int i){ Vdbe *pVm = (Vdbe *)pStmt; Mem *pVal; - vals = sqlite3_value_count(pStmt); + vals = sqlite3_data_count(pStmt); if( i>=vals || i<0 ){ sqlite3Error(pVm->db, SQLITE_RANGE, 0); return 0; @@ -612,7 +621,7 @@ int sqlite3_column_type(sqlite3_stmt *pStmt, int i){ Vdbe *p = (Vdbe *)pStmt; int f; - vals = sqlite3_value_count(pStmt); + vals = sqlite3_data_count(pStmt); if( i>=vals || i<0 ){ sqlite3Error(p->db, SQLITE_RANGE, 0); return 0; @@ -1321,60 +1330,67 @@ case OP_Halt: { } } +/* Opcode: String * * P3 +** +** The string value P3 is pushed onto the stack. If P3==0 then a +** NULL is pushed onto the stack. +*/ +/* Opcode: Real * * P3 +** +** The string value P3 is converted to a real and pushed on to the stack. +*/ /* Opcode: Integer P1 * P3 ** ** The integer value P1 is pushed onto the stack. If P3 is not zero ** then it is assumed to be a string representation of the same integer. ** If P1 is zero and P3 is not zero, then the value is derived from P3. */ -case OP_Integer: { - pTos++; - pTos->i = pOp->p1; - pTos->flags = MEM_Int; - if( pOp->p3 ){ - pTos->z = pOp->p3; - pTos->flags |= MEM_Utf8 | MEM_Str | MEM_Static | MEM_Term; - pTos->n = strlen(pOp->p3)+1; - if( pTos->i==0 ){ - sqlite3GetInt64(pTos->z, &pTos->i); - } - } - break; -} - -/* Opcode: String * * P3 -** -** The string value P3 is pushed onto the stack. If P3==0 then a -** NULL is pushed onto the stack. -*/ +case OP_Integer: +case OP_Real: case OP_String: { char *z = pOp->p3; + u8 op = pOp->opcode; + pTos++; - if( z==0 ){ - pTos->flags = MEM_Null; - }else{ - pTos->z = z; - pTos->n = strlen(z) + 1; - pTos->flags = MEM_Str | MEM_Static | MEM_Utf8 | MEM_Term; + pTos->flags = 0; + + /* If this is an OP_Real or OP_Integer opcode, set the pTos->r or pTos->i + ** values respectively. + */ + if( op==OP_Real ){ + assert( z ); + assert( sqlite3IsNumber(z, 0) ); + pTos->r = sqlite3AtoF(z, 0); + pTos->flags = MEM_Real; + }else if( op==OP_Integer ){ + pTos->flags = MEM_Int; + pTos->i = pOp->p1; + if( pTos->i==0 && pOp->p3 ){ + sqlite3GetInt64(pOp->p3, &pTos->i); + } } - break; -} -/* Opcode: Real * * P3 -** -** The string value P3 is converted to a real and pushed on to the stack. -*/ -case OP_Real: { - char *z = pOp->p3; + if( z ){ + /* FIX ME: For now the code in expr.c always puts UTF-8 in P3. It + ** should transform text to the native encoding before doing so. + */ + if( db->enc!=TEXT_Utf8 ){ + rc = sqlite3utfTranslate(z, -1, TEXT_Utf8, (void **)&pTos->z, + &pTos->n, db->enc); + if( rc!=SQLITE_OK ){ + assert( !pTos->z ); + goto abort_due_to_error; + } + pTos->flags |= MEM_Str | MEM_Dyn | MEM_Term; + }else{ + pTos->z = z; + pTos->n = strlen(z) + 1; + pTos->flags |= MEM_Str | MEM_Static | MEM_Term; + } + }else if( op==OP_String ){ + pTos->flags = MEM_Null; + } - assert( z ); - assert( sqlite3IsNumber(z, 0) ); - - pTos++; - pTos->r = sqlite3AtoF(z, 0); - pTos->z = z; - pTos->n = strlen(z)+1; - pTos->flags = MEM_Real|MEM_Str|MEM_Static|MEM_Utf8|MEM_Term; break; } @@ -1389,27 +1405,66 @@ case OP_Real: { */ case OP_Variable: { int j = pOp->p1 - 1; - Mem *pVar; assert( j>=0 && jnVar ); - /* Ensure the variable string (if it is a string) is UTF-8 encoded and - ** nul terminated. Do the transformation on the variable before it - ** is copied onto the stack, in case it is used again before this VDBE is - ** finalized. - */ - pVar = &p->apVar[j]; - SetEncoding(pVar, MEM_Utf8|MEM_Term); - - /* Copy the value in pVar to the top of the stack. If pVar is a string or - ** a blob just store a pointer to the same memory, do not make a copy. - */ pTos++; - memcpy(pTos, pVar, sizeof(*pVar)-NBFS); + memcpy(pTos, &p->apVar[j], sizeof(*pTos)-NBFS); if( pTos->flags&(MEM_Str|MEM_Blob) ){ pTos->flags &= ~(MEM_Dyn|MEM_Ephem|MEM_Short); pTos->flags |= MEM_Static; } + break; +} +/* Opcode: Utf16le_8 * * * +** +** The element on the top of the stack must be a little-endian UTF-16 +** encoded string. It is translated in-place to UTF-8. +*/ +case OP_Utf16le_8: { + rc = SQLITE_INTERNAL; + break; +} + +/* Opcode: Utf16be_8 * * * +** +** The element on the top of the stack must be a big-endian UTF-16 +** encoded string. It is translated in-place to UTF-8. +*/ +case OP_Utf16be_8: { + rc = SQLITE_INTERNAL; + break; +} + +/* Opcode: Utf8_16be * * * +** +** The element on the top of the stack must be a UTF-8 encoded +** string. It is translated to big-endian UTF-16. +*/ +case OP_Utf8_16be: { + rc = SQLITE_INTERNAL; + break; +} + +/* Opcode: Utf8_16le * * * +** +** The element on the top of the stack must be a UTF-8 encoded +** string. It is translated to little-endian UTF-16. +*/ +case OP_Utf8_16le: { + rc = SQLITE_INTERNAL; + break; +} + +/* +** Opcode: UtfSwab +** +** The element on the top of the stack must be an UTF-16 encoded +** string. Every second byte is exchanged, so as to translate +** the string from little-endian to big-endian or vice versa. +*/ +case OP_UtfSwab: { + rc = SQLITE_INTERNAL; break; } @@ -2425,7 +2480,6 @@ case OP_Column: { off += off2; sqlite3VdbeSerialGet(&zRec[off], colType, pTos, p->db->enc); - rc = SetEncoding(pTos, MEM_Utf8|MEM_Term); if( rc!=SQLITE_OK ){ goto abort_due_to_error; } @@ -2540,7 +2594,6 @@ case OP_Column: { zData = sMem.z; } sqlite3VdbeSerialGet(zData, pC->aType[p2], pTos, p->db->enc); - rc = SetEncoding(pTos, MEM_Utf8|MEM_Term); if( rc!=SQLITE_OK ){ goto abort_due_to_error; } @@ -2612,7 +2665,6 @@ case OP_MakeRecord: { if( zAffinity ){ applyAffinity(pRec, zAffinity[pRec-pData0]); } - SetEncoding(pRec, encToFlags(p->db->enc)); serial_type = sqlite3VdbeSerialType(pRec); nBytes += sqlite3VdbeSerialTypeLen(serial_type); nBytes += sqlite3VarintLen(serial_type); @@ -2736,7 +2788,6 @@ case OP_MakeIdxKey: { if( pRec->flags&MEM_Null ){ containsNull = 1; } - SetEncoding(pRec, encToFlags(p->db->enc)); serial_type = sqlite3VdbeSerialType(pRec); nByte += sqlite3VarintLen(serial_type); nByte += sqlite3VdbeSerialTypeLen(serial_type); @@ -3951,7 +4002,6 @@ case OP_IdxColumn: { pTos++; sqlite3VdbeSerialGet(&zData[len], serial_type, pTos, p->db->enc); - SetEncoding(pTos, MEM_Utf8|MEM_Term); if( freeZData ){ sqliteFree(zData); } diff --git a/src/vdbeInt.h b/src/vdbeInt.h index 5797423945..23d0c6c5ab 100644 --- a/src/vdbeInt.h +++ b/src/vdbeInt.h @@ -149,9 +149,6 @@ typedef struct Mem Mem; #define MEM_Blob 0x0010 /* Value is a BLOB */ #define MEM_Struct 0x0020 /* Value is some kind of struct */ -#define MEM_Utf8 0x0040 /* String uses UTF-8 encoding */ -#define MEM_Utf16be 0x0080 /* String uses UTF-16 big-endian */ -#define MEM_Utf16le 0x0100 /* String uses UTF-16 little-endian */ #define MEM_Term 0x0200 /* String has a nul terminator character */ #define MEM_Dyn 0x0400 /* Need to call sqliteFree() on Mem.z */ @@ -159,6 +156,34 @@ typedef struct Mem Mem; #define MEM_Ephem 0x1000 /* Mem.z points to an ephemeral string */ #define MEM_Short 0x2000 /* Mem.z points to Mem.zShort */ +/* Internally, all strings manipulated by the VDBE are encoded using the +** native encoding for the main database. Therefore the following three +** flags, which describe the text encoding of the string if the MEM_Str +** flag is true, are not generally valid for Mem* objects handled by the +** VDBE. +** +** When a user-defined function is called (see OP_Function), the Mem* +** objects that store the argument values for the function call are +** passed to the user-defined function routine cast to sqlite3_value*. +** The user routine may then call sqlite3_value_data() or +** sqlite3_value_data16() to request a UTF-8 or UTF-16 string. If the +** string representation currently stored in Mem.z is not the requested +** encoding, then a translation occurs. To keep track of things, the +** MEM_Utf* flags are set correctly for the database encoding before a +** user-routine is called, and kept up to date if any translations occur +** thereafter. +** +** When sqlite3_step() returns SQLITE3_ROW, indicating that a row of data +** is ready for processing by the caller, the data values are stored +** internally as Mem* objects. Before sqlite3_step() returns, the MEM_Utf* +** flags are set correctly for the database encoding. A translation may +** take place if the user requests a non-native encoding via +** sqlite3_column_data() or sqlite3_column_data16(). If this occurs, then +** the MEM_Utf* flags are updated accordingly. +*/ +#define MEM_Utf8 0x0040 /* String uses UTF-8 encoding */ +#define MEM_Utf16be 0x0080 /* String uses UTF-16 big-endian */ +#define MEM_Utf16le 0x0100 /* String uses UTF-16 little-endian */ /* The following MEM_ value appears only in AggElem.aMem.s.flag fields. ** It indicates that the corresponding AggElem.aMem.z points to a diff --git a/src/vdbeaux.c b/src/vdbeaux.c index abc278a8ea..4e40f2e54f 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -1101,7 +1101,7 @@ static int vdbeBindBlob( const char *zVal, /* Pointer to blob of data */ int bytes, /* Number of bytes to copy */ int copy, /* True to copy the memory, false to copy a pointer */ - int flags /* Valid combination of MEM_Blob, MEM_Str, MEM_UtfXX */ + int flags /* Valid combination of MEM_Blob, MEM_Str, MEM_Term */ ){ Mem *pVar; int rc; @@ -1207,38 +1207,79 @@ int sqlite3_bind_text( ** Bind a UTF-16 text value to an SQL statement variable. */ int sqlite3_bind_text16( - sqlite3_stmt *p, + sqlite3_stmt *pStmt, int i, const void *zData, int nData, int eCopy ){ + Vdbe *p = (Vdbe *)pStmt; + Mem *pVar; + u8 db_enc = p->db->enc; /* Text encoding of the database */ + u8 txt_enc; + int null_term = 0; + int flags; - - if( SQLITE3_BIGENDIAN ){ - flags = MEM_Str|MEM_Utf16be; + int rc; + + rc = vdbeUnbind(p, i); + if( rc!=SQLITE_OK ){ + return rc; + } + pVar = &p->apVar[i-1]; + + if( db_enc==TEXT_Utf8 ){ + /* If the database encoding is UTF-8, then do a translation. */ + pVar->z = sqlite3utf16to8(zData, nData, SQLITE3_BIGENDIAN); + if( !pVar->z ) return SQLITE_NOMEM; + pVar->n = strlen(pVar->z)+1; + pVar->flags = MEM_Str|MEM_Term|MEM_Dyn; + return SQLITE_OK; + } + + /* There may or may not be a byte order mark at the start of the UTF-16. + ** Either way set 'txt_enc' to the TEXT_Utf16* value indicating the + ** actual byte order used by this string. If the string does happen + ** to contain a BOM, then move zData so that it points to the first + ** byte after the BOM. + */ + txt_enc = sqlite3UtfReadBom(zData, nData); + if( txt_enc ){ + zData = (void *)(((u8 *)zData) + 2); }else{ - flags = MEM_Str|MEM_Utf16le; + txt_enc = SQLITE3_BIGENDIAN?TEXT_Utf16be:TEXT_Utf16le; } - if( zData ){ - /* If nData is less than zero, measure the length of the string. - ** manually. In this case the variable will always be null terminated. + if( nData<0 ){ + nData = sqlite3utf16ByteLen(zData, -1) + 2; + null_term = 1; + }else if( nData>1 && !((u8*)zData)[nData-1] && !((u8*)zData)[nData-2] ){ + null_term = 1; + } + + if( db_enc==txt_enc && !eCopy ){ + /* If the byte order of the string matches the byte order of the + ** database and the eCopy parameter is not set, then the string can + ** be used without making a copy. */ - if( nData<0 ){ - nData = sqlite3utf16ByteLen(zData, -1) + 2; - flags |= MEM_Term; + pVar->z = (char *)zData; + pVar->n = nData; + pVar->flags = MEM_Str|MEM_Static|(null_term?MEM_Term:0); + }else{ + /* Make a copy. Swap the byte order if required */ + pVar->n = nData + (null_term?0:2); + pVar->z = sqliteMalloc(pVar->n); + pVar->flags = MEM_Str|MEM_Dyn|MEM_Term; + if( db_enc==txt_enc ){ + memcpy(pVar->z, zData, nData); }else{ - /* If nData is greater than zero, check if the final character appears - ** to be a terminator. - */ - if( !(((u8 *)zData)[nData-1]) && !(((u8 *)zData)[nData-2]) ){ - flags |= MEM_Term; - } + swab(zData, pVar->z, nData); } - } - - return vdbeBindBlob((Vdbe *)p, i, zData, nData, eCopy, flags); + pVar->z[pVar->n-1] = '\0'; + pVar->z[pVar->n-2] = '\0'; + } + + return SQLITE_OK; } /* @@ -1400,13 +1441,18 @@ u64 sqlite3VdbeSerialType(Mem *pMem){ return 5; } if( flags&MEM_Str ){ - u64 t; - assert( pMem->n>0 ); - t = (pMem->n*2) + 13; + int n = pMem->n; + assert( n>=0 ); if( pMem->flags&MEM_Term ){ - t -= ((pMem->flags&MEM_Utf8)?2:4); + /* If the nul terminated flag is set we have to subtract something + ** from the serial-type. Depending on the encoding there could be + ** one or two 0x00 bytes at the end of the string. Check for these + ** and subtract 2 from serial_ + */ + if( n>0 && !pMem->z[n-1] ) n--; + if( n>0 && !pMem->z[n-1] ) n--; } - return t; + return ((n*2) + 13); } if( flags&MEM_Blob ){ return (pMem->n*2 + 12); @@ -1476,10 +1522,10 @@ int sqlite3VdbeSerialPut(unsigned char *buf, Mem *pMem){ ** and store the result in pMem. Return the number of bytes read. */ int sqlite3VdbeSerialGet( - const unsigned char *buf, - u64 serial_type, - Mem *pMem, - u8 enc + const unsigned char *buf, /* Buffer to deserialize from */ + u64 serial_type, /* Serial type to deserialize */ + Mem *pMem, /* Memory cell to write value into */ + u8 enc /* Text encoding. Used to determine nul term. character */ ){ int len; diff --git a/test/quick.test b/test/quick.test index 7107aa8086..be7f24b70d 100644 --- a/test/quick.test +++ b/test/quick.test @@ -10,7 +10,7 @@ #*********************************************************************** # This file runs all tests. # -# $Id: quick.test,v 1.17 2004/05/20 03:30:11 drh Exp $ +# $Id: quick.test,v 1.18 2004/05/23 13:30:59 danielk1977 Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -40,6 +40,7 @@ lappend EXCLUDE version.test ;# uses the btree_meta API (not updated) # Some tests fail in these file as a result of the partial manifest types # implementation. lappend EXCLUDE capi2.test +lappend EXCLUDE enc2.test if {[sqlite -has-codec]} {