diff --git a/manifest b/manifest index d04a239476..3236af57f9 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\ssqlite3_collation_needed()\sAPI\sand\sfix\ssome\serror\shandling\scases\ninvolving\sunknown\scollation\ssequences.\s(CVS\s1564) -D 2004-06-10T10:51:53 +C Correctly\shandle\sthe\ssituation\swhere\sa\scollation\ssequence\sis\savailable,\sbut\nnot\sin\sthe\spreferred\sencoding.\s(CVS\s1565) +D 2004-06-10T14:01:08 F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -27,7 +27,7 @@ F src/attach.c 93b8ecec4a8d7b4e9f2479e2327d90c9d01765e8 F src/auth.c 5c2f0bea4729c98c2be3b69d6b466fc51448fe79 F src/btree.c 281af87aa117de024f5b6c2728a2339cba9ef584 F src/btree.h 589427ac13bb544d298cd99726e2572a6fe4bdaa -F src/build.c 4b1a23d919fe01549702f7f1bfe7f8b656e77a17 +F src/build.c b36b62f49aea7d258cb804999dcc8650e4d79464 F src/date.c 8e6fa3173386fb29fdef012ee08a853c1e9908b2 F src/delete.c 911221aadb35d610c84fadb32e71c52990827e58 F src/encode.c a876af473d1d636faa3dca51c7571f2e007eea37 @@ -56,10 +56,10 @@ F src/random.c eff68e3f257e05e81eae6c4d50a51eb88beb4ff3 F src/select.c 6cb407796dde0e8f27450ead68856eb9f8188789 F src/shell.c ca519519dcbbc582f6d88f7d0e7583b857fd3469 F src/sqlite.h.in 2b6afe1de6935d3dfbd6042f46a62f1b7c3b3992 -F src/sqliteInt.h 6be535d420f99c57f29f13c3c2d6a3497432b366 +F src/sqliteInt.h e8e641bec4d7806023ce8192a64234d3599c5fc0 F src/table.c af14284fa36c8d41f6829e3f2819dce07d3e2de2 F src/tclsqlite.c e974c0b2479ed37334aeb268de331e0a1b21b5a8 -F src/test1.c f78d6ac0675bc5db48dac9c5379c965bdadb9113 +F src/test1.c 5f5c0773df1091cc02ddf6608a8f6e0c65940a56 F src/test2.c 05f810c90cf6262d5f352860e87d41a3f34207f9 F src/test3.c beafd0ccf7b9ae784744be1b1e66ffe8f64c25da F src/test4.c a921a69821fd30209589228e64f94e9f715b6fe2 @@ -73,9 +73,9 @@ F src/vacuum.c b921eb778842592e1fb48a9d4cef7e861103878f F src/vdbe.c 90e0e6bdbdf9b77c66f2500374b5784d30c323fa F src/vdbe.h 46f74444a213129bc4b5ce40124dd8ed613b0cde F src/vdbeInt.h d41605853332bdbd600d7ecd60e1f54bbaea174e -F src/vdbeapi.c 4ac95766b0515538037a7aec172ed26142f97cf9 +F src/vdbeapi.c bcf5821ed09070d586898374b905861c4dd73d0b F src/vdbeaux.c 73764dadcdbf79aa2d948f863eae07b18589e663 -F src/vdbemem.c 5b2fab8b5a830e5204413b808c4a2d8335189f21 +F src/vdbemem.c b1599f5d24131107a21a54e618e372e1252de958 F src/where.c dda77afaa593cd54e5955ec433076de18faf62f6 F test/all.test 569a92a8ee88f5300c057cc4a8f50fbbc69a3242 F test/attach.test aed659e52635662bcd5069599aaca823533edf5a @@ -103,7 +103,7 @@ F test/crashtest1.c 09c1c7d728ccf4feb9e481671e29dda5669bbcc2 F test/date.test aed5030482ebc02bd8d386c6c86a29f694ab068d F test/delete.test ac14bd6df5f1581d646eebc013d6b844a885dcf6 F test/enc.test a55481d45ff493804e8d88357feb4642fc50a6b2 -F test/enc2.test 28b61a098dd571b06147fe9f857489edba4e405d +F test/enc2.test 1d469f58ee7f187bf06e11bd72a12bdea6362b2f F test/expr.test 521588701dae8cf5aa2b8a18c5c897711f754332 F test/fkey1.test d65c824459916249bee501532d6154ddab0b5db7 F test/func.test 9816fbed0a5e87e00f4fc88b4cdcd638abc524c4 @@ -219,7 +219,7 @@ F www/support.tcl 1801397edd271cc39a2aadd54e701184b5181248 F www/tclsqlite.tcl 19191cf2a1010eaeff74c51d83fd5f5a4d899075 F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P 518d82d3b1ab996d675f45c94d740c98578a04a6 -R 9cc8eb0d8d516e56f0a005459d5bbc05 +P 67500546ea24fd7a30348960c98cd257dbfa965f +R 393c5de920a400aa587d8f21dc793fbe U danielk1977 -Z 25a8aff272a87359157d326e3bffc5a6 +Z 3447cf118fd13bbb0c99c8f654b072b2 diff --git a/manifest.uuid b/manifest.uuid index e2a218a437..cf59fef4f7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -67500546ea24fd7a30348960c98cd257dbfa965f \ No newline at end of file +49ab4794e1b5be5cbb3b87a65477659762487cf8 \ No newline at end of file diff --git a/src/build.c b/src/build.c index 1f09c250a9..09aa751b7d 100644 --- a/src/build.c +++ b/src/build.c @@ -23,7 +23,7 @@ ** ROLLBACK ** PRAGMA ** -** $Id: build.c,v 1.215 2004/06/10 10:50:08 danielk1977 Exp $ +** $Id: build.c,v 1.216 2004/06/10 14:01:08 danielk1977 Exp $ */ #include "sqliteInt.h" #include @@ -912,10 +912,10 @@ CollSeq *sqlite3FindCollSeq( case TEXT_Utf8: break; case TEXT_Utf16le: - pColl = &pColl[2]; + pColl = &pColl[1]; break; case TEXT_Utf16be: - pColl = &pColl[1]; + pColl = &pColl[2]; break; default: assert(!"Cannot happen"); diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 57e139bce9..27fd9bdf27 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.280 2004/06/10 10:50:32 danielk1977 Exp $ +** @(#) $Id: sqliteInt.h,v 1.281 2004/06/10 14:01:08 danielk1977 Exp $ */ #include "config.h" #include "sqlite3.h" @@ -1401,3 +1401,10 @@ CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName, int nName); CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr); int sqlite3CheckCollSeq(Parse *, CollSeq *); int sqlite3CheckIndexCollSeq(Parse *, Index *); + +const void *sqlite3ValueText(sqlite3_value*, u8); +int sqlite3ValueBytes(sqlite3_value*, u8); +void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8); +void sqlite3ValueFree(sqlite3_value*); +sqlite3_value *sqlite3ValueNew(); + diff --git a/src/test1.c b/src/test1.c index 911e6d12e7..39a3f53778 100644 --- a/src/test1.c +++ b/src/test1.c @@ -13,7 +13,7 @@ ** is not included in the SQLite library. It is used for automated ** testing of the SQLite library. ** -** $Id: test1.c,v 1.74 2004/06/09 17:37:28 drh Exp $ +** $Id: test1.c,v 1.75 2004/06/10 14:01:08 danielk1977 Exp $ */ #include "sqliteInt.h" #include "tcl.h" @@ -860,6 +860,108 @@ static int test_bind( return TCL_OK; } + +/* +** Usage: add_test_collate +** +** This function is used to test that SQLite selects the correct collation +** sequence callback when multiple versions (for different text encodings) +** are available. +** +** Calling this routine registers the collation sequence "test_collate" +** with database handle . The second argument must be a list of three +** boolean values. If the first is true, then a version of test_collate is +** registered for UTF-8, if the second is true, a version is registered for +** UTF-16le, if the third is true, a UTF-16be version is available. +** Previous versions of test_collate are deleted. +** +** The collation sequence test_collate is implemented by calling the +** following TCL script: +** +** "test_collate " +** +** The and are the two values being compared, encoded in UTF-8. +** The parameter is the encoding of the collation function that +** SQLite selected to call. The TCL test script implements the +** "test_collate" proc. +** +** Note that this will only work with one intepreter at a time, as the +** interp pointer to use when evaluating the TCL script is stored in +** pTestCollateInterp. +*/ +static Tcl_Interp* pTestCollateInterp; +static int test_collate_func( + void *pCtx, + int nA, const void *zA, + int nB, const void *zB +){ + Tcl_Interp *i = pTestCollateInterp; + int encin = (int)pCtx; + int res; + + sqlite3_value *pVal; + Tcl_Obj *pX; + + pX = Tcl_NewStringObj("test_collate", -1); + Tcl_IncrRefCount(pX); + + switch( encin ){ + case SQLITE_UTF8: + Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-8",-1)); + break; + case SQLITE_UTF16LE: + Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16LE",-1)); + break; + case SQLITE_UTF16BE: + Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1)); + break; + default: + assert(0); + } + + pVal = sqlite3ValueNew(); + sqlite3ValueSetStr(pVal, nA, zA, encin); + Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1)); + sqlite3ValueSetStr(pVal, nB, zB, encin); + Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1)); + sqlite3ValueFree(pVal); + + Tcl_EvalObjEx(i, pX, 0); + Tcl_DecrRefCount(pX); + Tcl_GetIntFromObj(i, Tcl_GetObjResult(i), &res); + return res; +} +static int test_collate( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite3 *db; + int val; + + if( objc!=5 ) goto bad_args; + pTestCollateInterp = interp; + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR; + + if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[2], &val) ) return TCL_ERROR; + sqlite3_create_collation(db, "test_collate", SQLITE_UTF8, + (void *)SQLITE_UTF8, val?test_collate_func:0); + if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[3], &val) ) return TCL_ERROR; + sqlite3_create_collation(db, "test_collate", SQLITE_UTF16LE, + (void *)SQLITE_UTF16LE, val?test_collate_func:0); + if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[4], &val) ) return TCL_ERROR; + sqlite3_create_collation(db, "test_collate", SQLITE_UTF16BE, + (void *)SQLITE_UTF16BE, val?test_collate_func:0); + + return TCL_OK; + +bad_args: + Tcl_AppendResult(interp, "wrong # args: should be \"", + Tcl_GetStringFromObj(objv[0], 0), " ", 0); + return TCL_ERROR; +} + /* ** Usage: breakpoint ** @@ -1868,6 +1970,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ { "sqlite3OsClose", test_sqlite3OsClose, 0 }, { "sqlite3OsLock", test_sqlite3OsLock, 0 }, { "sqlite3OsUnlock", test_sqlite3OsUnlock, 0 }, + { "add_test_collate", test_collate, 0 }, }; int i; diff --git a/src/vdbeapi.c b/src/vdbeapi.c index ac7d976bbf..ea69e3e679 100644 --- a/src/vdbeapi.c +++ b/src/vdbeapi.c @@ -58,40 +58,10 @@ long long int sqlite3_value_int64(sqlite3_value *pVal){ return pVal->i; } const unsigned char *sqlite3_value_text(sqlite3_value *pVal){ - if( pVal->flags&MEM_Null ){ - /* For a NULL return a NULL Pointer */ - return 0; - } - - if( pVal->flags&MEM_Str ){ - /* If there is already a string representation, make sure it is in - ** encoded in UTF-8. - */ - sqlite3VdbeChangeEncoding(pVal, TEXT_Utf8); - }else if( !(pVal->flags&MEM_Blob) ){ - /* Otherwise, unless this is a blob, convert it to a UTF-8 string */ - sqlite3VdbeMemStringify(pVal, TEXT_Utf8); - } - - return pVal->z; + return (const char *)sqlite3ValueText(pVal, TEXT_Utf8); } const void *sqlite3_value_text16(sqlite3_value* pVal){ - if( pVal->flags&MEM_Null ){ - /* For a NULL return a NULL Pointer */ - return 0; - } - - if( pVal->flags&MEM_Str ){ - /* If there is already a string representation, make sure it is in - ** encoded in UTF-16 machine byte order. - */ - sqlite3VdbeChangeEncoding(pVal, TEXT_Utf16); - }else if( !(pVal->flags&MEM_Blob) ){ - /* Otherwise, unless this is a blob, convert it to a UTF-16 string */ - sqlite3VdbeMemStringify(pVal, TEXT_Utf16); - } - - return (const void *)(pVal->z); + return sqlite3ValueText(pVal, TEXT_Utf16); } int sqlite3_value_type(sqlite3_value* pVal){ return pVal->type; diff --git a/src/vdbemem.c b/src/vdbemem.c index bfe2cf52bb..b69dfb6ad4 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -47,12 +47,15 @@ int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){ */ char *z; int n; - int rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc, - (void **)&z, &n, desiredEnc); + int rc; + + rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc, &z, &n, desiredEnc); if( rc!=SQLITE_OK ){ return rc; } - + if( pMem->flags&MEM_Dyn ){ + sqliteFree(pMem->z); + } /* Result of sqlite3utfTranslate is currently always dynamically ** allocated and nul terminated. This might be altered as a performance ** enhancement later. @@ -444,38 +447,20 @@ int sqlite3MemCompare(const Mem *pMem1, const Mem *pMem2, const CollSeq *pColl){ if( pMem1->enc==pColl->enc ){ return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z); }else{ - switch( pColl->enc ){ - case SQLITE_UTF8: - return pColl->xCmp( - pColl->pUser, - sqlite3_value_bytes((sqlite3_value *)pMem1), - sqlite3_value_text((sqlite3_value *)pMem1), - sqlite3_value_bytes((sqlite3_value *)pMem2), - sqlite3_value_text((sqlite3_value *)pMem2) - ); - case SQLITE_UTF16LE: - case SQLITE_UTF16BE: - /* FIX ME: Handle non-native UTF-16 properly instead of - ** assuming it is always native. */ - return pColl->xCmp( - pColl->pUser, - sqlite3_value_bytes16((sqlite3_value *)pMem1), - sqlite3_value_text16((sqlite3_value *)pMem1), - sqlite3_value_bytes16((sqlite3_value *)pMem2), - sqlite3_value_text16((sqlite3_value *)pMem2) - ); - default: - assert(!"Cannot happen"); - } + return pColl->xCmp( + pColl->pUser, + sqlite3ValueBytes((sqlite3_value*)pMem1, pColl->enc), + sqlite3ValueText((sqlite3_value*)pMem1, pColl->enc), + sqlite3ValueBytes((sqlite3_value*)pMem2, pColl->enc), + sqlite3ValueText((sqlite3_value*)pMem2, pColl->enc) + ); } } /* If a NULL pointer was passed as the collate function, fall through - ** to the blob case and use memcmp(). - */ + ** to the blob case and use memcmp(). */ } - /* Both values must be blobs. Compare using memcmp(). - */ + /* Both values must be blobs. Compare using memcmp(). */ rc = memcmp(pMem1->z, pMem2->z, (pMem1->n>pMem2->n)?pMem2->n:pMem1->n); if( rc==0 ){ rc = pMem1->n - pMem2->n; @@ -588,3 +573,72 @@ void sqlite3VdbeMemSanity(Mem *pMem, u8 db_enc){ || (pMem->flags&MEM_Null)==0 ); } #endif + +/* This function is only available internally, it is not part of the +** external API. It works in a similar way to sqlite3_value_text(), +** except the data returned is in the encoding specified by the second +** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or +** SQLITE_UTF8. +*/ +const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){ + assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8); + if( pVal->flags&MEM_Null ){ + /* For a NULL return a NULL Pointer */ + return 0; + } + + if( pVal->flags&MEM_Str ){ + /* If there is already a string representation, make sure it is in + ** encoded in the required UTF-16 byte order. + */ + sqlite3VdbeChangeEncoding(pVal, enc); + }else if( !(pVal->flags&MEM_Blob) ){ + /* Otherwise, unless this is a blob, convert it to a UTF-16 string */ + sqlite3VdbeMemStringify(pVal, enc); + } + + return (const void *)(pVal->z); +} + +sqlite3_value* sqlite3ValueNew(){ + Mem *p = sqliteMalloc(sizeof(*p)); + if( p ){ + p->flags = MEM_Null; + p->type = SQLITE_NULL; + } + return p; +} + +void sqlite3ValueSetStr(sqlite3_value *v, int n, const void *z, u8 enc){ + Mem *p = (Mem *)v; + if( p->z && p->flags&MEM_Dyn ){ + sqliteFree(p->z); + } + p->z = (char *)z; + p->n = n; + p->enc = enc; + p->type = SQLITE_TEXT; + p->flags = (MEM_Str|MEM_Static); + + if( p->n<0 ){ + if( enc==SQLITE_UTF8 ){ + p->n = strlen(p->z); + }else{ + p->n = sqlite3utf16ByteLen(p->z, -1); + } + } + return; +} + +void sqlite3ValueFree(sqlite3_value *v){ + sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8); + sqliteFree(v); +} + +int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ + Mem *p = (Mem*)pVal; + if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){ + return p->n; + } + return 0; +} diff --git a/test/enc2.test b/test/enc2.test index e21234d98e..0dfc8bd3d5 100644 --- a/test/enc2.test +++ b/test/enc2.test @@ -13,7 +13,7 @@ # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and # UTF-16be). # -# $Id: enc2.test,v 1.7 2004/06/10 05:59:25 danielk1977 Exp $ +# $Id: enc2.test,v 1.8 2004/06/10 14:01:08 danielk1977 Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -143,5 +143,112 @@ do_test enc2-4.3 { } {1 {attached databases must use the same text encoding as main database}} db2 close +db close + +# The following tests - enc2-5.* - test that SQLite selects the correct +# collation sequence when more than one is available. + +set ::values [list one two three four five] +set ::test_collate_enc INVALID +proc test_collate {enc lhs rhs} { + set ::test_collate_enc $enc + set l [lsearch -exact $::values $lhs] + set r [lsearch -exact $::values $rhs] + set res [expr $l - $r] + # puts "test_collate $enc $lhs $rhs -> $res" + return $res +} + +file delete -force test.db +set DB [sqlite db test.db] +do_test enc2-5.0 { + execsql { + CREATE TABLE t5(a); + INSERT INTO t5 VALUES('one'); + INSERT INTO t5 VALUES('two'); + INSERT INTO t5 VALUES('five'); + INSERT INTO t5 VALUES('three'); + INSERT INTO t5 VALUES('four'); + } +} {} +do_test enc2-5.1 { + add_test_collate $DB 1 1 1 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-8} +do_test enc2-5.2 { + add_test_collate $DB 0 1 0 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-16LE} +breakpoint +do_test enc2-5.3 { + add_test_collate $DB 0 0 1 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-16BE} + +file delete -force test.db +set DB [sqlite db test.db] +execsql {pragma encoding = 'UTF-16LE'} +do_test enc2-5.4 { + execsql { + CREATE TABLE t5(a); + INSERT INTO t5 VALUES('one'); + INSERT INTO t5 VALUES('two'); + INSERT INTO t5 VALUES('five'); + INSERT INTO t5 VALUES('three'); + INSERT INTO t5 VALUES('four'); + } +} {} +do_test enc2-5.5 { + add_test_collate $DB 1 1 1 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-16LE} +do_test enc2-5.6 { + add_test_collate $DB 1 0 1 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-16BE} +breakpoint +do_test enc2-5.7 { + add_test_collate $DB 1 0 0 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-8} + +file delete -force test.db +set DB [sqlite db test.db] +execsql {pragma encoding = 'UTF-16BE'} +do_test enc2-5.8 { + execsql { + CREATE TABLE t5(a); + INSERT INTO t5 VALUES('one'); + INSERT INTO t5 VALUES('two'); + INSERT INTO t5 VALUES('five'); + INSERT INTO t5 VALUES('three'); + INSERT INTO t5 VALUES('four'); + } +} {} +do_test enc2-5.9 { + add_test_collate $DB 1 1 1 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-16BE} +do_test enc2-5.10 { + add_test_collate $DB 1 1 0 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-16LE} +breakpoint +do_test enc2-5.11 { + add_test_collate $DB 1 0 0 + set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] + lappend res $::test_collate_enc +} {one two three four five UTF-8} finish_test + + +