diff --git a/VERSION b/VERSION index 8b54409523..4c24bf133a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.28 +1.0.29 diff --git a/manifest b/manifest index cb501af9a5..e1cd4bb2c8 100644 --- a/manifest +++ b/manifest @@ -1,9 +1,9 @@ -C :-)\s(CVS\s198) -D 2001-04-04T12:33:36 +C Added\ssupport\sfor\sUTF-8\s(CVS\s199) +D 2001-04-04T21:10:19 F COPYRIGHT 74a8a6531a42e124df07ab5599aad63870fa0bd4 F Makefile.in fd8815aa01a7181f60f786158b7737a35413189e F README 51f6a4e7408b34afa5bc1c0485f61b6a4efb6958 -F VERSION 010a68e4026cf015511e2c5acc54815fa374d11b +F VERSION fb0fbad3b7a52736cc18ea5fcf1bc6dba7b2c40c F configure 3dc1edb9dcf60215e31ff72b447935ab62211442 x F configure.in d892ca33db7e88a055519ce2f36dcb11020e8fff F doc/lemon.html e233a3e97a779c7a87e1bc4528c664a58e49dd47 @@ -34,12 +34,12 @@ F src/select.c a6bfdaa92d4614e79bf18129283c5163faa291fc F src/shell.c 441e20913cde0bb71281f4027623c623530241cd F src/shell.tcl 27ecbd63dd88396ad16d81ab44f73e6c0ea9d20e F src/sqlite.h.in 3b446fcbed6005f0ab89632f3356c4708b349e88 -F src/sqliteInt.h 7872fa85719adff8e458f4a27d56a0ea3e8a3dd1 +F src/sqliteInt.h 97e2dd488ab433e27eda6e26f4c84a9a2684785c F src/table.c 5be76051a8ed6f6bfa641f4adc52529efa34fbf9 F src/tclsqlite.c f654b0399ea8a29262637dbe71fdfe7c26bd9032 F src/tokenize.c 8fc3936eefad84f1fff19e0892ed0542eb9ac7b3 F src/update.c 8365b3922ea098330d1e20862d6e64911e4e03d0 -F src/util.c f4573201fc2b581dbf601c53787349310b7da150 +F src/util.c 16a7af31c23db4066b2cfdc200a4067bc13d80ab F src/vdbe.c 53de79aa212997a8615659d7a7e6eb12aa77255d F src/vdbe.h dc1205da434c6a9da03b5d6b089270bbc8e6d437 F src/where.c 459bf37ac7849599da400420984b3306484b4cbb @@ -47,7 +47,7 @@ F test/all.test 15cac2f6b2d4c55bf896212aff3cc9d6597b0490 F test/copy.test b77a1214bd7756f2849d5c4fa6e715c0ff0c34eb F test/dbbe.test a022fe2d983848f786e17ef1fc6809cfd37fb02c F test/delete.test 50b9b1f06c843d591741dba7869433a105360dbf -F test/expr.test 83b29f29f58df80d185d163b7fab5c658a1bd29a +F test/expr.test 278d7524079219f3bf9df41225903c9fb8c61c19 F test/func.test 02aed8845b98bde1043dda97455de1d37238ebb3 F test/in.test ea48016c4fcc479d315932ae2b8568146686ffaf F test/index.test b189ac11bf8d4fbcf87402f4028c25c8a6d91bb5 @@ -83,18 +83,18 @@ F www/arch.fig 4f246003b7da23bd63b8b0af0618afb4ee3055c8 F www/arch.png 8dae0766d42ed3de9ed013c1341a5792bcf633e6 F www/arch.tcl a40380c1fe0080c43e6cc5c20ed70731511b06be F www/c_interface.tcl 11be2d5826eb7d6efd629751d3b483c1ed78ba14 -F www/changes.tcl 2f8108b1c19f6b1428cd89aeb4da0f446af5a8b6 +F www/changes.tcl 9cea962625b87620cfbb2ecb0ed9a8a5e6b2cee3 F www/crosscompile.tcl c99efacb3aefaa550c6e80d91b240f55eb9fd33e F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c F www/fileformat.tcl cfb7fba80b7275555281ba2f256c00734bcdd1c9 -F www/index.tcl b499fca29ebbaff66bcafe87b7dd1b8b36fb5a51 +F www/index.tcl e6a1fb2adfa9a881d7bee0c86c2959d1a872e7bb F www/lang.tcl 7fec414487ebee2cbb17c90addf5a026cd10396a F www/mingw.tcl fc5f4ba9d336b6e8c97347cc6496d6162461ef60 F www/opcode.tcl cb3a1abf8b7b9be9f3a228d097d6bf8b742c2b6f F www/sqlite.tcl cb0d23d8f061a80543928755ec7775da6e4f362f F www/tclsqlite.tcl 06f81c401f79a04f2c5ebfb97e7c176225c0aef2 F www/vdbe.tcl 0c8aaa529dd216ccbf7daaabd80985e413d5f9ad -P ab645437447059be18018890bd33f3a977c4c8b3 -R 77bf85a68e27b2bffb4067f583abffa7 +P 24bede902722e8003451143b38284de48700c459 +R 65747f257df149c1ff5989dd8e504549 U drh -Z ad2ab6bf64d9a1be2ace4f74c839311d +Z 80c8925b23615f8a689b0be9c577c85a diff --git a/manifest.uuid b/manifest.uuid index adb757991d..e344f290a3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -24bede902722e8003451143b38284de48700c459 \ No newline at end of file +f0674697c90e4eed630c36e40e724de05d54f74f \ No newline at end of file diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 37f4a6b0b7..04de020795 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -23,7 +23,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.38 2001/04/04 11:48:58 drh Exp $ +** @(#) $Id: sqliteInt.h,v 1.39 2001/04/04 21:10:19 drh Exp $ */ #include "sqlite.h" #include "dbbe.h" @@ -414,7 +414,7 @@ void sqliteExprIfFalse(Parse*, Expr*, int); Table *sqliteFindTable(sqlite*,char*); void sqliteCopy(Parse*, Token*, Token*, Token*); void sqliteVacuum(Parse*, Token*); -int sqliteGlobCompare(const char*,const char*); +int sqliteGlobCompare(const unsigned char*,const unsigned char*); int sqliteLikeCompare(const unsigned char*,const unsigned char*); char *sqliteTableNameFromToken(Token*); int sqliteExprCheck(Parse*, Expr*, int, int*); diff --git a/src/util.c b/src/util.c index 05521d4ca3..9d45e1b4d0 100644 --- a/src/util.c +++ b/src/util.c @@ -26,7 +26,7 @@ ** This file contains functions for allocating memory, comparing ** strings, and stuff like that. ** -** $Id: util.c,v 1.18 2001/03/14 12:35:57 drh Exp $ +** $Id: util.c,v 1.19 2001/04/04 21:10:19 drh Exp $ */ #include "sqliteInt.h" #include @@ -278,7 +278,8 @@ void sqliteSetString(char **pz, const char *zFirst, ...){ /* ** Works like sqliteSetString, but each string is now followed by -** a length integer. -1 means use the whole string. +** a length integer which specifies how much of the source string +** to copy (in bytes). -1 means use the whole string. */ void sqliteSetNString(char **pz, ...){ va_list ap; @@ -725,7 +726,93 @@ int sqliteSortCompare(const char *a, const char *b){ } /* -** Compare two strings for equality where the first string can +** When the first byte of a UTF-8 character is used as the +** index of the following array, then the value is the number +** of bytes in the whole UTF-8 character. This matrix assumes +** a well-formed UTF-8 string. All bets are off if the input +** is not well-formed. +*/ +static const unsigned char utf8_width[] = { + /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ +/* 0x */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 1x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 2x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 3x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 4x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 6x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 8x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 9x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* Ax */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* Bx */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* Ex */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +/* Fx */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, +}; + +/* +** This routine computes the number of bytes to the start of the +** next UTF-8 character. We could just do +** +** z += utf8_width[*z] +** +** accomplish the same thing, if we know that z was a well-formed +** UTF-8 string. If it is not, then z might be incremented past +** its null terminator. This function, though slower, will never +** increment z past its terminator. +*/ +static int utf8_char_size(const unsigned char *z){ + int i, n = utf8_width[*z]; + for(i=1; i 0 ){ + c = (c<<6) | (0x3f & *(z++)); + } + return c; +} + +/* +** Compare two UTF-8 strings for equality where the first string can ** potentially be a "glob" expression. Return true (1) if they ** are the same and false (0) if they are different. ** @@ -752,21 +839,28 @@ int sqliteSortCompare(const char *a, const char *b){ ** ** abc[*]xyz Matches "abc*xyz" only */ -int sqliteGlobCompare(const char *zPattern, const char *zString){ - register char c; +int +sqliteGlobCompare(const unsigned char *zPattern, const unsigned char *zString){ + register int c; int invert; int seen; - char c2; + int c2; while( (c = *zPattern)!=0 ){ switch( c ){ case '*': - while( zPattern[1]=='*' ) zPattern++; - if( zPattern[1]==0 ) return 1; - c = zPattern[1]; - if( c=='[' || c=='?' ){ + while( (c=zPattern[1]) == '*' || c == '?' ){ + if( c=='?' ){ + if( *zString==0 ) return 0; + zString += utf8_char_size(zString); + } + zPattern++; + } + if( c==0 ) return 1; + c = UpperToLower[c]; + if( c=='[' ){ while( *zString && sqliteGlobCompare(&zPattern[1],zString)==0 ){ - zString++; + zString += utf8_char_size(zString); } return *zString!=0; }else{ @@ -774,17 +868,21 @@ int sqliteGlobCompare(const char *zPattern, const char *zString){ while( c2 != 0 && c2 != c ){ c2 = *++zString; } if( c2==0 ) return 0; if( sqliteGlobCompare(&zPattern[1],zString) ) return 1; - zString++; + zString += utf8_char_size(zString); } return 0; } - case '?': + case '?': { if( *zString==0 ) return 0; + zString += utf8_char_size(zString); + zPattern++; break; - case '[': + } + case '[': { + int prior_c = 0; seen = 0; invert = 0; - c = *zString; + c = utf8_to_int(zString); if( c==0 ) return 0; c2 = *++zPattern; if( c2=='^' ){ invert = 1; c2 = *++zPattern; } @@ -792,28 +890,38 @@ int sqliteGlobCompare(const char *zPattern, const char *zString){ if( c==']' ) seen = 1; c2 = *++zPattern; } - while( (c2 = *zPattern)!=0 && c2!=']' ){ - if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 ){ - if( c>zPattern[-1] && c0 ){ + zPattern++; + c2 = utf8_to_int(zPattern); + if( c>=prior_c && c<=c2 ) seen = 1; + prior_c = 0; }else if( c==c2 ){ seen = 1; + prior_c = c2; + }else{ + prior_c = c2; } - zPattern++; + zPattern += utf8_char_size(zPattern); } if( c2==0 || (seen ^ invert)==0 ) return 0; + zString += utf8_char_size(zString); + zPattern++; break; - default: + } + default: { if( c != *zString ) return 0; + zPattern++; + zString++; break; + } } - zPattern++; - zString++; } return *zString==0; } /* -** Compare two strings for equality using the "LIKE" operator of +** Compare two UTF-8 strings for equality using the "LIKE" operator of ** SQL. The '%' character matches any sequence of 0 or more ** characters and '_' matches any single character. Case is ** not significant. @@ -823,38 +931,42 @@ int sqliteGlobCompare(const char *zPattern, const char *zString){ */ int sqliteLikeCompare(const unsigned char *zPattern, const unsigned char *zString){ - register char c; - char c2; + register int c; + int c2; while( (c = UpperToLower[*zPattern])!=0 ){ switch( c ){ - case '%': - while( zPattern[1]=='%' ) zPattern++; - if( zPattern[1]==0 ) return 1; - c = UpperToLower[0xff & zPattern[1]]; - if( c=='_' ){ - while( *zString && sqliteLikeCompare(&zPattern[1],zString)==0 ){ - zString++; + case '%': { + while( (c=zPattern[1]) == '%' || c == '_' ){ + if( c=='_' ){ + if( *zString==0 ) return 0; + zString += utf8_char_size(zString); } - return *zString!=0; - }else{ - while( (c2 = UpperToLower[*zString])!=0 ){ - while( c2 != 0 && c2 != c ){ c2 = UpperToLower[*++zString]; } - if( c2==0 ) return 0; - if( sqliteLikeCompare(&zPattern[1],zString) ) return 1; - zString++; - } - return 0; + zPattern++; } - case '_': + if( c==0 ) return 1; + c = UpperToLower[c]; + while( (c2=UpperToLower[*zString])!=0 ){ + while( c2 != 0 && c2 != c ){ c2 = UpperToLower[*++zString]; } + if( c2==0 ) return 0; + if( sqliteLikeCompare(&zPattern[1],zString) ) return 1; + zString += utf8_char_size(zString); + } + return 0; + } + case '_': { if( *zString==0 ) return 0; + zString += utf8_char_size(zString); + zPattern++; break; - default: + } + default: { if( c != UpperToLower[*zString] ) return 0; + zPattern++; + zString++; break; + } } - zPattern++; - zString++; } return *zString==0; } diff --git a/test/expr.test b/test/expr.test index f75162e2c4..76a4706bfe 100644 --- a/test/expr.test +++ b/test/expr.test @@ -23,7 +23,7 @@ # This file implements regression tests for SQLite library. The # focus of this file is testing expressions. # -# $Id: expr.test,v 1.10 2001/04/04 11:48:58 drh Exp $ +# $Id: expr.test,v 1.11 2001/04/04 21:10:19 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -159,6 +159,22 @@ test_expr expr-5.10 {t1='abxyzzyc', t2='A%_C'} {t1 LIKE t2} 1 test_expr expr-5.11 {t1='abc', t2='xyz'} {t1 NOT LIKE t2} 1 test_expr expr-5.12 {t1='abc', t2='ABC'} {t1 NOT LIKE t2} 0 +# The following tests only work on versions of TCL that support +# Unicode. +# +test_expr expr-5.13 "t1='a\u0080c', t2='A_C'" {t1 LIKE t2} 1 +test_expr expr-5.14 "t1='a\u07FFc', t2='A_C'" {t1 LIKE t2} 1 +test_expr expr-5.15 "t1='a\u0800c', t2='A_C'" {t1 LIKE t2} 1 +test_expr expr-5.16 "t1='a\uFFFFc', t2='A_C'" {t1 LIKE t2} 1 +test_expr expr-5.17 "t1='a\u0080', t2='A__'" {t1 LIKE t2} 0 +test_expr expr-5.18 "t1='a\u07FF', t2='A__'" {t1 LIKE t2} 0 +test_expr expr-5.19 "t1='a\u0800', t2='A__'" {t1 LIKE t2} 0 +test_expr expr-5.20 "t1='a\uFFFF', t2='A__'" {t1 LIKE t2} 0 +test_expr expr-5.21 "t1='ax\uABCD', t2='A_\uABCD'" {t1 LIKE t2} 1 +test_expr expr-5.22 "t1='ax\u1234', t2='A%\u1234'" {t1 LIKE t2} 1 +test_expr expr-5.23 "t1='ax\uFEDC', t2='A_%'" {t1 LIKE t2} 1 +test_expr expr-5.24 "t1='ax\uFEDCy\uFEDC', t2='A%\uFEDC'" {t1 LIKE t2} 1 + test_expr expr-6.1 {t1='abc', t2='xyz'} {t1 GLOB t2} 0 test_expr expr-6.2 {t1='abc', t2='ABC'} {t1 GLOB t2} 0 test_expr expr-6.3 {t1='abc', t2='A?C'} {t1 GLOB t2} 0 @@ -185,6 +201,26 @@ test_expr expr-6.23 {t1='abcdefg', t2='a*?g'} {t1 GLOB t2} 1 test_expr expr-6.24 {t1='ac', t2='a*c'} {t1 GLOB t2} 1 test_expr expr-6.25 {t1='ac', t2='a*?c'} {t1 GLOB t2} 0 +# These tests only work on versions of TCL that support Unicode +# +test_expr expr-6.26 "t1='a\u0080c', t2='a?c'" {t1 GLOB t2} 1 +test_expr expr-6.27 "t1='a\u07ffc', t2='a?c'" {t1 GLOB t2} 1 +test_expr expr-6.28 "t1='a\u0800c', t2='a?c'" {t1 GLOB t2} 1 +test_expr expr-6.29 "t1='a\uffffc', t2='a?c'" {t1 GLOB t2} 1 +test_expr expr-6.30 "t1='a\u1234', t2='a?'" {t1 GLOB t2} 1 +test_expr expr-6.31 "t1='a\u1234', t2='a??'" {t1 GLOB t2} 0 +test_expr expr-6.32 "t1='ax\u1234', t2='a?\u1234'" {t1 GLOB t2} 1 +test_expr expr-6.33 "t1='ax\u1234', t2='a*\u1234'" {t1 GLOB t2} 1 +test_expr expr-6.34 "t1='ax\u1234y\u1234', t2='a*\u1234'" {t1 GLOB t2} 1 +test_expr expr-6.35 "t1='a\u1234b', t2='a\[x\u1234y\]b'" {t1 GLOB t2} 1 +test_expr expr-6.36 "t1='a\u1234b', t2='a\[\u1233-\u1235\]b'" {t1 GLOB t2} 1 +test_expr expr-6.37 "t1='a\u1234b', t2='a\[\u1234-\u124f\]b'" {t1 GLOB t2} 1 +test_expr expr-6.38 "t1='a\u1234b', t2='a\[\u1235-\u124f\]b'" {t1 GLOB t2} 0 +test_expr expr-6.39 "t1='a\u1234b', t2='a\[a-\u1235\]b'" {t1 GLOB t2} 1 +test_expr expr-6.40 "t1='a\u1234b', t2='a\[a-\u1234\]b'" {t1 GLOB t2} 1 +test_expr expr-6.41 "t1='a\u1234b', t2='a\[a-\u1233\]b'" {t1 GLOB t2} 0 + + # The sqliteExprIfFalse and sqliteExprIfTrue routines are only # executed as part of a WHERE clause. Create a table suitable # for testing these functions. diff --git a/www/changes.tcl b/www/changes.tcl index 9ca8882b8c..6298f333d1 100644 --- a/www/changes.tcl +++ b/www/changes.tcl @@ -17,6 +17,13 @@ proc chng {date desc} { puts "

    $desc

" } +chng {2001 Apr 5 (1.0.29)} { +
  • The LIKE and GLOB operators now assume both operands are + UTF-8 strings. + ** This change could potentially + break existing code **
  • +} + chng {2001 Apr 4 (1.0.28)} {
  • Added limited support for transactions. At this point, transactions will do table locking on the GDBM backend. There is no support (yet) diff --git a/www/index.tcl b/www/index.tcl index 2e6bc45e32..e0aa19fc53 100644 --- a/www/index.tcl +++ b/www/index.tcl @@ -1,7 +1,7 @@ # # Run this TCL script to generate HTML for the index.html file. # -set rcsid {$Id: index.tcl,v 1.33 2001/04/04 12:33:36 drh Exp $} +set rcsid {$Id: index.tcl,v 1.34 2001/04/04 21:10:19 drh Exp $} puts { SQLite: An SQL Database Library Built Atop GDBM @@ -46,7 +46,8 @@ included.
  • the GNU Readline library
  • A Tcl-based test suite provides near 100% code coverage
  • -
  • 7500+ lines of C code. No external dependencies other than GDBM.
  • +
  • Approximately 9500 lines of C code. No external dependencies other +than GDBM.
  • Built and tested under Linux, HPUX, and WinNT.
  • @@ -61,20 +62,16 @@ all code except for a few areas which are unreachable or which are only reached when malloc() fails. The code has been tested for memory leaks and is found to be clean.

    -

    Important Note: A bug was found in the processing of UPDATE -statements when the WHERE clause contained some terms that could be -satisfied using indices and other terms which could not. The problem -was fixed in version 1.0.22. Users of prior versions of SQLite should -consider upgrading.

    +

    Important Note: Beginning with version 1.0.29, the LIKE and +GLOB operators assume both operands are UTF-8 strings. Prior to that, +both operators assumed plain ASCII strings. Users of earlier versions +of SQLite that invoke LIKE or GLOB to compare strings containing +characters greater than 127 may have problems when they upgrade to +version 1.0.29 or later.

    -

    -Among the SQL features that SQLite does not currently implement are:

    - -

    -

      -
    • constraints are parsed but are not enforced
    • -
    -

    +

    Important Note: Serious bugs have been found in versions +1.0.22 on Unix and 1.0.26 on Windows. Users of these or earlier +versions of SQLite should upgrade.

    Documentation