1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Added support for UTF-8 (CVS 199)

FossilOrigin-Name: f0674697c90e4eed630c36e40e724de05d54f74f
This commit is contained in:
drh
2001-04-04 21:10:18 +00:00
parent 0012761159
commit e17a7e330b
8 changed files with 228 additions and 76 deletions

View File

@ -1 +1 @@
1.0.28
1.0.29

View File

@ -1,9 +1,9 @@
C :-)\s(CVS\s198)
D 2001-04-04T12:33:36
C Added\ssupport\sfor\sUTF-8\s(CVS\s199)
D 2001-04-04T21:10:19
F COPYRIGHT 74a8a6531a42e124df07ab5599aad63870fa0bd4
F Makefile.in fd8815aa01a7181f60f786158b7737a35413189e
F README 51f6a4e7408b34afa5bc1c0485f61b6a4efb6958
F VERSION 010a68e4026cf015511e2c5acc54815fa374d11b
F VERSION fb0fbad3b7a52736cc18ea5fcf1bc6dba7b2c40c
F configure 3dc1edb9dcf60215e31ff72b447935ab62211442 x
F configure.in d892ca33db7e88a055519ce2f36dcb11020e8fff
F doc/lemon.html e233a3e97a779c7a87e1bc4528c664a58e49dd47
@ -34,12 +34,12 @@ F src/select.c a6bfdaa92d4614e79bf18129283c5163faa291fc
F src/shell.c 441e20913cde0bb71281f4027623c623530241cd
F src/shell.tcl 27ecbd63dd88396ad16d81ab44f73e6c0ea9d20e
F src/sqlite.h.in 3b446fcbed6005f0ab89632f3356c4708b349e88
F src/sqliteInt.h 7872fa85719adff8e458f4a27d56a0ea3e8a3dd1
F src/sqliteInt.h 97e2dd488ab433e27eda6e26f4c84a9a2684785c
F src/table.c 5be76051a8ed6f6bfa641f4adc52529efa34fbf9
F src/tclsqlite.c f654b0399ea8a29262637dbe71fdfe7c26bd9032
F src/tokenize.c 8fc3936eefad84f1fff19e0892ed0542eb9ac7b3
F src/update.c 8365b3922ea098330d1e20862d6e64911e4e03d0
F src/util.c f4573201fc2b581dbf601c53787349310b7da150
F src/util.c 16a7af31c23db4066b2cfdc200a4067bc13d80ab
F src/vdbe.c 53de79aa212997a8615659d7a7e6eb12aa77255d
F src/vdbe.h dc1205da434c6a9da03b5d6b089270bbc8e6d437
F src/where.c 459bf37ac7849599da400420984b3306484b4cbb
@ -47,7 +47,7 @@ F test/all.test 15cac2f6b2d4c55bf896212aff3cc9d6597b0490
F test/copy.test b77a1214bd7756f2849d5c4fa6e715c0ff0c34eb
F test/dbbe.test a022fe2d983848f786e17ef1fc6809cfd37fb02c
F test/delete.test 50b9b1f06c843d591741dba7869433a105360dbf
F test/expr.test 83b29f29f58df80d185d163b7fab5c658a1bd29a
F test/expr.test 278d7524079219f3bf9df41225903c9fb8c61c19
F test/func.test 02aed8845b98bde1043dda97455de1d37238ebb3
F test/in.test ea48016c4fcc479d315932ae2b8568146686ffaf
F test/index.test b189ac11bf8d4fbcf87402f4028c25c8a6d91bb5
@ -83,18 +83,18 @@ F www/arch.fig 4f246003b7da23bd63b8b0af0618afb4ee3055c8
F www/arch.png 8dae0766d42ed3de9ed013c1341a5792bcf633e6
F www/arch.tcl a40380c1fe0080c43e6cc5c20ed70731511b06be
F www/c_interface.tcl 11be2d5826eb7d6efd629751d3b483c1ed78ba14
F www/changes.tcl 2f8108b1c19f6b1428cd89aeb4da0f446af5a8b6
F www/changes.tcl 9cea962625b87620cfbb2ecb0ed9a8a5e6b2cee3
F www/crosscompile.tcl c99efacb3aefaa550c6e80d91b240f55eb9fd33e
F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c
F www/fileformat.tcl cfb7fba80b7275555281ba2f256c00734bcdd1c9
F www/index.tcl b499fca29ebbaff66bcafe87b7dd1b8b36fb5a51
F www/index.tcl e6a1fb2adfa9a881d7bee0c86c2959d1a872e7bb
F www/lang.tcl 7fec414487ebee2cbb17c90addf5a026cd10396a
F www/mingw.tcl fc5f4ba9d336b6e8c97347cc6496d6162461ef60
F www/opcode.tcl cb3a1abf8b7b9be9f3a228d097d6bf8b742c2b6f
F www/sqlite.tcl cb0d23d8f061a80543928755ec7775da6e4f362f
F www/tclsqlite.tcl 06f81c401f79a04f2c5ebfb97e7c176225c0aef2
F www/vdbe.tcl 0c8aaa529dd216ccbf7daaabd80985e413d5f9ad
P ab645437447059be18018890bd33f3a977c4c8b3
R 77bf85a68e27b2bffb4067f583abffa7
P 24bede902722e8003451143b38284de48700c459
R 65747f257df149c1ff5989dd8e504549
U drh
Z ad2ab6bf64d9a1be2ace4f74c839311d
Z 80c8925b23615f8a689b0be9c577c85a

View File

@ -1 +1 @@
24bede902722e8003451143b38284de48700c459
f0674697c90e4eed630c36e40e724de05d54f74f

View File

@ -23,7 +23,7 @@
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.38 2001/04/04 11:48:58 drh Exp $
** @(#) $Id: sqliteInt.h,v 1.39 2001/04/04 21:10:19 drh Exp $
*/
#include "sqlite.h"
#include "dbbe.h"
@ -414,7 +414,7 @@ void sqliteExprIfFalse(Parse*, Expr*, int);
Table *sqliteFindTable(sqlite*,char*);
void sqliteCopy(Parse*, Token*, Token*, Token*);
void sqliteVacuum(Parse*, Token*);
int sqliteGlobCompare(const char*,const char*);
int sqliteGlobCompare(const unsigned char*,const unsigned char*);
int sqliteLikeCompare(const unsigned char*,const unsigned char*);
char *sqliteTableNameFromToken(Token*);
int sqliteExprCheck(Parse*, Expr*, int, int*);

View File

@ -26,7 +26,7 @@
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.18 2001/03/14 12:35:57 drh Exp $
** $Id: util.c,v 1.19 2001/04/04 21:10:19 drh Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
@ -278,7 +278,8 @@ void sqliteSetString(char **pz, const char *zFirst, ...){
/*
** Works like sqliteSetString, but each string is now followed by
** a length integer. -1 means use the whole string.
** a length integer which specifies how much of the source string
** to copy (in bytes). -1 means use the whole string.
*/
void sqliteSetNString(char **pz, ...){
va_list ap;
@ -725,7 +726,93 @@ int sqliteSortCompare(const char *a, const char *b){
}
/*
** Compare two strings for equality where the first string can
** When the first byte of a UTF-8 character is used as the
** index of the following array, then the value is the number
** of bytes in the whole UTF-8 character. This matrix assumes
** a well-formed UTF-8 string. All bets are off if the input
** is not well-formed.
*/
static const unsigned char utf8_width[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
/* 0x */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 1x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 2x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 3x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 4x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 6x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 8x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 9x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* Ax */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* Bx */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Ex */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* Fx */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
};
/*
** This routine computes the number of bytes to the start of the
** next UTF-8 character. We could just do
**
** z += utf8_width[*z]
**
** accomplish the same thing, if we know that z was a well-formed
** UTF-8 string. If it is not, then z might be incremented past
** its null terminator. This function, though slower, will never
** increment z past its terminator.
*/
static int utf8_char_size(const unsigned char *z){
int i, n = utf8_width[*z];
for(i=1; i<n && z[i]!=0; i++){}
return i;
}
/*
** Convert the UTF-8 character pointed to by the input parameter
** into a 31-bit UCS character and return an integer holding the
** 31-bit UCS character.
*/
static int utf8_to_int(const unsigned char *z){
int n = utf8_width[*z];
int c;
switch( n ){
case 0: {
return 0;
}
case 1: {
return *z;
}
case 2: {
c = 0x1f & *(z++);
break;
}
case 3: {
c = 0x0f & *(z++);
break;
}
case 4: {
c = 0x07 & *(z++);
break;
}
case 5: {
c = 0x03 & *(z++);
break;
}
case 6: {
c = 0x01 & *(z++);
break;
}
}
while( (--n) > 0 ){
c = (c<<6) | (0x3f & *(z++));
}
return c;
}
/*
** Compare two UTF-8 strings for equality where the first string can
** potentially be a "glob" expression. Return true (1) if they
** are the same and false (0) if they are different.
**
@ -752,21 +839,28 @@ int sqliteSortCompare(const char *a, const char *b){
**
** abc[*]xyz Matches "abc*xyz" only
*/
int sqliteGlobCompare(const char *zPattern, const char *zString){
register char c;
int
sqliteGlobCompare(const unsigned char *zPattern, const unsigned char *zString){
register int c;
int invert;
int seen;
char c2;
int c2;
while( (c = *zPattern)!=0 ){
switch( c ){
case '*':
while( zPattern[1]=='*' ) zPattern++;
if( zPattern[1]==0 ) return 1;
c = zPattern[1];
if( c=='[' || c=='?' ){
while( (c=zPattern[1]) == '*' || c == '?' ){
if( c=='?' ){
if( *zString==0 ) return 0;
zString += utf8_char_size(zString);
}
zPattern++;
}
if( c==0 ) return 1;
c = UpperToLower[c];
if( c=='[' ){
while( *zString && sqliteGlobCompare(&zPattern[1],zString)==0 ){
zString++;
zString += utf8_char_size(zString);
}
return *zString!=0;
}else{
@ -774,17 +868,21 @@ int sqliteGlobCompare(const char *zPattern, const char *zString){
while( c2 != 0 && c2 != c ){ c2 = *++zString; }
if( c2==0 ) return 0;
if( sqliteGlobCompare(&zPattern[1],zString) ) return 1;
zString++;
zString += utf8_char_size(zString);
}
return 0;
}
case '?':
case '?': {
if( *zString==0 ) return 0;
zString += utf8_char_size(zString);
zPattern++;
break;
case '[':
}
case '[': {
int prior_c = 0;
seen = 0;
invert = 0;
c = *zString;
c = utf8_to_int(zString);
if( c==0 ) return 0;
c2 = *++zPattern;
if( c2=='^' ){ invert = 1; c2 = *++zPattern; }
@ -792,28 +890,38 @@ int sqliteGlobCompare(const char *zPattern, const char *zString){
if( c==']' ) seen = 1;
c2 = *++zPattern;
}
while( (c2 = *zPattern)!=0 && c2!=']' ){
if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 ){
if( c>zPattern[-1] && c<zPattern[1] ) seen = 1;
while( (c2 = utf8_to_int(zPattern))!=0 && c2!=']' ){
if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 && prior_c>0 ){
zPattern++;
c2 = utf8_to_int(zPattern);
if( c>=prior_c && c<=c2 ) seen = 1;
prior_c = 0;
}else if( c==c2 ){
seen = 1;
prior_c = c2;
}else{
prior_c = c2;
}
zPattern++;
zPattern += utf8_char_size(zPattern);
}
if( c2==0 || (seen ^ invert)==0 ) return 0;
break;
default:
if( c != *zString ) return 0;
zString += utf8_char_size(zString);
zPattern++;
break;
}
default: {
if( c != *zString ) return 0;
zPattern++;
zString++;
break;
}
}
}
return *zString==0;
}
/*
** Compare two strings for equality using the "LIKE" operator of
** Compare two UTF-8 strings for equality using the "LIKE" operator of
** SQL. The '%' character matches any sequence of 0 or more
** characters and '_' matches any single character. Case is
** not significant.
@ -823,38 +931,42 @@ int sqliteGlobCompare(const char *zPattern, const char *zString){
*/
int
sqliteLikeCompare(const unsigned char *zPattern, const unsigned char *zString){
register char c;
char c2;
register int c;
int c2;
while( (c = UpperToLower[*zPattern])!=0 ){
switch( c ){
case '%':
while( zPattern[1]=='%' ) zPattern++;
if( zPattern[1]==0 ) return 1;
c = UpperToLower[0xff & zPattern[1]];
case '%': {
while( (c=zPattern[1]) == '%' || c == '_' ){
if( c=='_' ){
while( *zString && sqliteLikeCompare(&zPattern[1],zString)==0 ){
zString++;
if( *zString==0 ) return 0;
zString += utf8_char_size(zString);
}
return *zString!=0;
}else{
while( (c2 = UpperToLower[*zString])!=0 ){
zPattern++;
}
if( c==0 ) return 1;
c = UpperToLower[c];
while( (c2=UpperToLower[*zString])!=0 ){
while( c2 != 0 && c2 != c ){ c2 = UpperToLower[*++zString]; }
if( c2==0 ) return 0;
if( sqliteLikeCompare(&zPattern[1],zString) ) return 1;
zString++;
zString += utf8_char_size(zString);
}
return 0;
}
case '_':
case '_': {
if( *zString==0 ) return 0;
break;
default:
if( c != UpperToLower[*zString] ) return 0;
zString += utf8_char_size(zString);
zPattern++;
break;
}
default: {
if( c != UpperToLower[*zString] ) return 0;
zPattern++;
zString++;
break;
}
}
}
return *zString==0;
}

View File

@ -23,7 +23,7 @@
# This file implements regression tests for SQLite library. The
# focus of this file is testing expressions.
#
# $Id: expr.test,v 1.10 2001/04/04 11:48:58 drh Exp $
# $Id: expr.test,v 1.11 2001/04/04 21:10:19 drh Exp $
set testdir [file dirname $argv0]
source $testdir/tester.tcl
@ -159,6 +159,22 @@ test_expr expr-5.10 {t1='abxyzzyc', t2='A%_C'} {t1 LIKE t2} 1
test_expr expr-5.11 {t1='abc', t2='xyz'} {t1 NOT LIKE t2} 1
test_expr expr-5.12 {t1='abc', t2='ABC'} {t1 NOT LIKE t2} 0
# The following tests only work on versions of TCL that support
# Unicode.
#
test_expr expr-5.13 "t1='a\u0080c', t2='A_C'" {t1 LIKE t2} 1
test_expr expr-5.14 "t1='a\u07FFc', t2='A_C'" {t1 LIKE t2} 1
test_expr expr-5.15 "t1='a\u0800c', t2='A_C'" {t1 LIKE t2} 1
test_expr expr-5.16 "t1='a\uFFFFc', t2='A_C'" {t1 LIKE t2} 1
test_expr expr-5.17 "t1='a\u0080', t2='A__'" {t1 LIKE t2} 0
test_expr expr-5.18 "t1='a\u07FF', t2='A__'" {t1 LIKE t2} 0
test_expr expr-5.19 "t1='a\u0800', t2='A__'" {t1 LIKE t2} 0
test_expr expr-5.20 "t1='a\uFFFF', t2='A__'" {t1 LIKE t2} 0
test_expr expr-5.21 "t1='ax\uABCD', t2='A_\uABCD'" {t1 LIKE t2} 1
test_expr expr-5.22 "t1='ax\u1234', t2='A%\u1234'" {t1 LIKE t2} 1
test_expr expr-5.23 "t1='ax\uFEDC', t2='A_%'" {t1 LIKE t2} 1
test_expr expr-5.24 "t1='ax\uFEDCy\uFEDC', t2='A%\uFEDC'" {t1 LIKE t2} 1
test_expr expr-6.1 {t1='abc', t2='xyz'} {t1 GLOB t2} 0
test_expr expr-6.2 {t1='abc', t2='ABC'} {t1 GLOB t2} 0
test_expr expr-6.3 {t1='abc', t2='A?C'} {t1 GLOB t2} 0
@ -185,6 +201,26 @@ test_expr expr-6.23 {t1='abcdefg', t2='a*?g'} {t1 GLOB t2} 1
test_expr expr-6.24 {t1='ac', t2='a*c'} {t1 GLOB t2} 1
test_expr expr-6.25 {t1='ac', t2='a*?c'} {t1 GLOB t2} 0
# These tests only work on versions of TCL that support Unicode
#
test_expr expr-6.26 "t1='a\u0080c', t2='a?c'" {t1 GLOB t2} 1
test_expr expr-6.27 "t1='a\u07ffc', t2='a?c'" {t1 GLOB t2} 1
test_expr expr-6.28 "t1='a\u0800c', t2='a?c'" {t1 GLOB t2} 1
test_expr expr-6.29 "t1='a\uffffc', t2='a?c'" {t1 GLOB t2} 1
test_expr expr-6.30 "t1='a\u1234', t2='a?'" {t1 GLOB t2} 1
test_expr expr-6.31 "t1='a\u1234', t2='a??'" {t1 GLOB t2} 0
test_expr expr-6.32 "t1='ax\u1234', t2='a?\u1234'" {t1 GLOB t2} 1
test_expr expr-6.33 "t1='ax\u1234', t2='a*\u1234'" {t1 GLOB t2} 1
test_expr expr-6.34 "t1='ax\u1234y\u1234', t2='a*\u1234'" {t1 GLOB t2} 1
test_expr expr-6.35 "t1='a\u1234b', t2='a\[x\u1234y\]b'" {t1 GLOB t2} 1
test_expr expr-6.36 "t1='a\u1234b', t2='a\[\u1233-\u1235\]b'" {t1 GLOB t2} 1
test_expr expr-6.37 "t1='a\u1234b', t2='a\[\u1234-\u124f\]b'" {t1 GLOB t2} 1
test_expr expr-6.38 "t1='a\u1234b', t2='a\[\u1235-\u124f\]b'" {t1 GLOB t2} 0
test_expr expr-6.39 "t1='a\u1234b', t2='a\[a-\u1235\]b'" {t1 GLOB t2} 1
test_expr expr-6.40 "t1='a\u1234b', t2='a\[a-\u1234\]b'" {t1 GLOB t2} 1
test_expr expr-6.41 "t1='a\u1234b', t2='a\[a-\u1233\]b'" {t1 GLOB t2} 0
# The sqliteExprIfFalse and sqliteExprIfTrue routines are only
# executed as part of a WHERE clause. Create a table suitable
# for testing these functions.

View File

@ -17,6 +17,13 @@ proc chng {date desc} {
puts "<DD><P><UL>$desc</UL></P></DD>"
}
chng {2001 Apr 5 (1.0.29)} {
<li>The LIKE and GLOB operators now assume both operands are
UTF-8 strings.
<font color="red">** This change could potentially
break existing code **</font></li>
}
chng {2001 Apr 4 (1.0.28)} {
<li>Added limited support for transactions. At this point, transactions
will do table locking on the GDBM backend. There is no support (yet)

View File

@ -1,7 +1,7 @@
#
# Run this TCL script to generate HTML for the index.html file.
#
set rcsid {$Id: index.tcl,v 1.33 2001/04/04 12:33:36 drh Exp $}
set rcsid {$Id: index.tcl,v 1.34 2001/04/04 21:10:19 drh Exp $}
puts {<html>
<head><title>SQLite: An SQL Database Library Built Atop GDBM</title></head>
@ -46,7 +46,8 @@ included.</li>
the <a href="http://www.google.com/search?q=gnu+readline+library">GNU
Readline library</a></li>
<li>A Tcl-based test suite provides near 100% code coverage</li>
<li>7500+ lines of C code. No external dependencies other than GDBM.</li>
<li>Approximately 9500 lines of C code. No external dependencies other
than GDBM.</li>
<li>Built and tested under Linux, HPUX, and WinNT.</li>
</ul>
</p>
@ -61,20 +62,16 @@ all code except for a few areas which are unreachable or which are
only reached when <tt>malloc()</tt> fails. The code has been tested
for memory leaks and is found to be clean.</p>
<p><b>Important Note:</b> A bug was found in the processing of UPDATE
statements when the WHERE clause contained some terms that could be
satisfied using indices and other terms which could not. The problem
was fixed in version 1.0.22. Users of prior versions of SQLite should
consider upgrading.</p>
<p><b>Important Note:</b> Beginning with version 1.0.29, the LIKE and
GLOB operators assume both operands are UTF-8 strings. Prior to that,
both operators assumed plain ASCII strings. Users of earlier versions
of SQLite that invoke LIKE or GLOB to compare strings containing
characters greater than 127 may have problems when they upgrade to
version 1.0.29 or later.</p>
<p>
Among the SQL features that SQLite does not currently implement are:</p>
<p>
<ul>
<li>constraints are parsed but are not enforced</li>
</ul>
</p>
<p><b>Important Note:</b> Serious bugs have been found in versions
1.0.22 on Unix and 1.0.26 on Windows. Users of these or earlier
versions of SQLite should upgrade.</p>
<h2>Documentation</h2>