1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-10-24 09:53:10 +03:00

The built-in substr() function applied to a BLOB counts bytes, not characters. (CVS 3997)

FossilOrigin-Name: 75d573080d03ee48fe88710f70c6875ff9cae19c
This commit is contained in:
drh
2007-05-15 01:13:47 +00:00
parent 2a9abf61c7
commit f764e6fc3d
5 changed files with 150 additions and 25 deletions

View File

@@ -1,5 +1,5 @@
C Make\ssure\sthe\sparser\saborts\squickly\sfollowing\sa\ssyntax\serror.\s(CVS\s3996) C The\sbuilt-in\ssubstr()\sfunction\sapplied\sto\sa\sBLOB\scounts\sbytes,\snot\scharacters.\s(CVS\s3997)
D 2007-05-15T00:09:13 D 2007-05-15T01:13:47
F Makefile.in 87b200ad9970907f76df734d29dff3d294c10935 F Makefile.in 87b200ad9970907f76df734d29dff3d294c10935
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@@ -71,7 +71,7 @@ F src/date.c 6049db7d5a8fdf2c677ff7d58fa31d4f6593c988
F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29 F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29
F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b
F src/expr.c bad06f783d2daf1884a922859c8e7427323e7dc3 F src/expr.c bad06f783d2daf1884a922859c8e7427323e7dc3
F src/func.c b6586f83c141859c103dd4954de66efc3fbd101b F src/func.c cdbe36f2a71b7e6835189414c2fb626a339e300f
F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5 F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5
F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564 F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564
F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d
@@ -347,6 +347,7 @@ F test/speed2.test 53177056baf6556dcbdcf032bbdfc41c1aa74ded
F test/sqllimits1.test b15a5784e47199d68fa1182157ba7e790f467d9f F test/sqllimits1.test b15a5784e47199d68fa1182157ba7e790f467d9f
F test/subquery.test ae324ee928c5fb463a3ce08a8860d6e7f1ca5797 F test/subquery.test ae324ee928c5fb463a3ce08a8860d6e7f1ca5797
F test/subselect.test 974e87f8fc91c5f00dd565316d396a5a6c3106c4 F test/subselect.test 974e87f8fc91c5f00dd565316d396a5a6c3106c4
F test/substr.test 9f26cfca74397b26ab217fb838c3d0549eb4bcf3
F test/sync.test d05397b8f89f423dd6dba528692019ab036bc1c3 F test/sync.test d05397b8f89f423dd6dba528692019ab036bc1c3
F test/table.test dbdfd06aef054ad5aed8e57a782137d57d5c5528 F test/table.test dbdfd06aef054ad5aed8e57a782137d57d5c5528
F test/tableapi.test 036575a98dcce7c92e9f39056839bbad8a715412 F test/tableapi.test 036575a98dcce7c92e9f39056839bbad8a715412
@@ -468,7 +469,7 @@ F www/fullscanb.gif f7c94cb227f060511f8909e10f570157263e9a25
F www/index-ex1-x-b.gif f9b1d85c3fa2435cf38b15970c7e3aa1edae23a3 F www/index-ex1-x-b.gif f9b1d85c3fa2435cf38b15970c7e3aa1edae23a3
F www/index.tcl 8f500433525f34ef93427ba5f4f83fb5fde1e0e7 F www/index.tcl 8f500433525f34ef93427ba5f4f83fb5fde1e0e7
F www/indirect1b1.gif adfca361d2df59e34f9c5cac52a670c2bfc303a1 F www/indirect1b1.gif adfca361d2df59e34f9c5cac52a670c2bfc303a1
F www/lang.tcl 26a38c36ec6686aee0e46eeac90acb3aae373537 F www/lang.tcl 8dde95ed76af71579a132c9bb730d3764056d3ff
F www/lockingv3.tcl e52345bd20323bef6146bfce18ae0829b2b7c87d F www/lockingv3.tcl e52345bd20323bef6146bfce18ae0829b2b7c87d
F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c
F www/nulls.tcl ec35193f92485b87b90a994a01d0171b58823fcf F www/nulls.tcl ec35193f92485b87b90a994a01d0171b58823fcf
@@ -490,7 +491,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5 F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
P af76928fc5891f9a222ac0c39f8b443a9707b9e3 P d07cdd3c096c120d104ae13f7932c0a955324517
R 3e5c27a41d7feb2f3994d5008df8b7ba R 2be0a4c9f659ac8ba502f4d27f744853
U drh U drh
Z 9652c7d3c4c7e15de8852ab1114d57ec Z 429140db9719a9a8e8c56d1f7aa1fad7

View File

@@ -1 +1 @@
d07cdd3c096c120d104ae13f7932c0a955324517 75d573080d03ee48fe88710f70c6875ff9cae19c

View File

@@ -16,7 +16,7 @@
** sqliteRegisterBuildinFunctions() found at the bottom of the file. ** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope. ** All other code has file scope.
** **
** $Id: func.c,v 1.155 2007/05/12 06:11:12 danielk1977 Exp $ ** $Id: func.c,v 1.156 2007/05/15 01:13:47 drh Exp $
*/ */
#include "sqliteInt.h" #include "sqliteInt.h"
#include <ctype.h> #include <ctype.h>
@@ -146,7 +146,14 @@ static void absFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
} }
/* /*
** Implementation of the substr() function ** Implementation of the substr() function.
**
** substr(x,p1,p2) returns p2 characters of x[] beginning with p1.
** p1 is 1-indexed. So substr(x,1,1) returns the first character
** of x. If x is text, then we actually count UTF-8 characters.
** If x is a blob, then we count bytes.
**
** If p1 is negative, then we begin abs(p1) from the end of x[].
*/ */
static void substrFunc( static void substrFunc(
sqlite3_context *context, sqlite3_context *context,
@@ -157,14 +164,22 @@ static void substrFunc(
const unsigned char *z2; const unsigned char *z2;
int i; int i;
int len; int len;
int p0type;
i64 p1, p2; i64 p1, p2;
assert( argc==3 ); assert( argc==3 );
p0type = sqlite3_value_type(argv[0]);
if( p0type==SQLITE_BLOB ){
len = sqlite3_value_bytes(argv[0]);
z = sqlite3_value_blob(argv[0]);
if( z==0 ) return;
}else{
z = sqlite3_value_text(argv[0]); z = sqlite3_value_text(argv[0]);
if( z==0 ) return; if( z==0 ) return;
for(len=0, z2=z; *z2; z2++){ if( (0xc0&*z2)!=0x80 ) len++; }
}
p1 = sqlite3_value_int(argv[1]); p1 = sqlite3_value_int(argv[1]);
p2 = sqlite3_value_int(argv[2]); p2 = sqlite3_value_int(argv[2]);
for(len=0, z2=z; *z2; z2++){ if( (0xc0&*z2)!=0x80 ) len++; }
if( p1<0 ){ if( p1<0 ){
p1 += len; p1 += len;
if( p1<0 ){ if( p1<0 ){
@@ -177,6 +192,7 @@ static void substrFunc(
if( p1+p2>len ){ if( p1+p2>len ){
p2 = len-p1; p2 = len-p1;
} }
if( p0type!=SQLITE_BLOB ){
for(i=0; i<p1 && z[i]; i++){ for(i=0; i<p1 && z[i]; i++){
if( (z[i]&0xc0)==0x80 ) p1++; if( (z[i]&0xc0)==0x80 ) p1++;
} }
@@ -187,6 +203,10 @@ static void substrFunc(
while( z[i] && (z[i]&0xc0)==0x80 ){ i++; p2++; } while( z[i] && (z[i]&0xc0)==0x80 ){ i++; p2++; }
if( p2<0 ) p2 = 0; if( p2<0 ) p2 = 0;
sqlite3_result_text(context, (char*)&z[p1], p2, SQLITE_TRANSIENT); sqlite3_result_text(context, (char*)&z[p1], p2, SQLITE_TRANSIENT);
}else{
if( p2<0 ) p2 = 0;
sqlite3_result_blob(context, (char*)&z[p1], p2, SQLITE_TRANSIENT);
}
} }
/* /*

103
test/substr.test Normal file
View File

@@ -0,0 +1,103 @@
# 2007 May 14
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library. The
# focus of this file is testing the built-in SUBSTR() functions.
#
# $Id: substr.test,v 1.1 2007/05/15 01:13:47 drh Exp $
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# Create a table to work with.
#
execsql {
CREATE TABLE t1(t text, b blob)
}
proc substr-test {id string i1 i2 result} {
db eval {
DELETE FROM t1;
INSERT INTO t1(t) VALUES($string)
}
do_test substr-$id.1 [subst {
execsql {
SELECT substr(t, $i1, $i2) FROM t1
}
}] [list $result]
set qstr '[string map {' ''} $string]'
do_test substr-$id.2 [subst {
execsql {
SELECT substr($qstr, $i1, $i2)
}
}] [list $result]
}
proc subblob-test {id hex i1 i2 hexresult} {
db eval "
DELETE FROM t1;
INSERT INTO t1(b) VALUES(x'$hex')
"
do_test substr-$id.1 [subst {
execsql {
SELECT hex(substr(b, $i1, $i2)) FROM t1
}
}] [list $hexresult]
do_test substr-$id.2 [subst {
execsql {
SELECT hex(substr(x'$hex', $i1, $i2))
}
}] [list $hexresult]
}
# Basic SUBSTR functionality
#
substr-test 1.1 abcdefg 1 1 a
substr-test 1.2 abcdefg 2 1 b
substr-test 1.3 abcdefg 1 2 ab
substr-test 1.4 abcdefg 1 100 abcdefg
substr-test 1.5 abcdefg 0 1 a
substr-test 1.6 abcdefg -1 1 g
substr-test 1.7 abcdefg -1 10 g
substr-test 1.8 abcdefg -5 3 cde
substr-test 1.9 abcdefg -7 3 abc
substr-test 1.10 abcdefg -100 98 abcde
# Make sure everything works with long unicode characters
#
substr-test 2.1 \u1234\u2345\u3456 1 1 \u1234
substr-test 2.2 \u1234\u2345\u3456 2 1 \u2345
substr-test 2.3 \u1234\u2345\u3456 1 2 \u1234\u2345
substr-test 2.4 \u1234\u2345\u3456 -1 1 \u3456
substr-test 2.5 a\u1234b\u2345c\u3456c -5 3 b\u2345c
# Basic functionality for BLOBs
#
subblob-test 3.1 61626364656667 1 1 61
subblob-test 3.2 61626364656667 2 1 62
subblob-test 3.3 61626364656667 1 2 6162
subblob-test 3.4 61626364656667 1 100 61626364656667
subblob-test 3.5 61626364656667 0 1 61
subblob-test 3.6 61626364656667 -1 1 67
subblob-test 3.7 61626364656667 -1 10 67
subblob-test 3.8 61626364656667 -5 3 636465
subblob-test 3.9 61626364656667 -7 3 616263
subblob-test 3.10 61626364656667 -100 98 6162636465
# If these blobs were strings, then they would contain multi-byte
# characters. But since they are blobs, the substr indices refer
# to bytes.
#
subblob-test 4.1 61E188B462E28D8563E3919663 1 1 61
subblob-test 4.2 61E188B462E28D8563E3919663 2 1 E1
subblob-test 4.3 61E188B462E28D8563E3919663 1 2 61E1
subblob-test 4.4 61E188B462E28D8563E3919663 -2 1 96
subblob-test 4.5 61E188B462E28D8563E3919663 -5 4 63E39196
subblob-test 4.6 61E188B462E28D8563E3919663 -100 98 61E188B462E28D8563E391
finish_test

View File

@@ -1,7 +1,7 @@
# #
# Run this Tcl script to generate the lang-*.html files. # Run this Tcl script to generate the lang-*.html files.
# #
set rcsid {$Id: lang.tcl,v 1.128 2007/04/06 11:26:00 drh Exp $} set rcsid {$Id: lang.tcl,v 1.129 2007/05/15 01:13:47 drh Exp $}
source common.tcl source common.tcl
if {[llength $argv]>0} { if {[llength $argv]>0} {
@@ -1455,8 +1455,9 @@ that is running. Example: "2.8.0"</td>
with the <i>Y</i>-th character and which is <i>Z</i> characters long. with the <i>Y</i>-th character and which is <i>Z</i> characters long.
The left-most character of <i>X</i> is number 1. If <i>Y</i> is negative The left-most character of <i>X</i> is number 1. If <i>Y</i> is negative
the the first character of the substring is found by counting from the the the first character of the substring is found by counting from the
right rather than the left. If SQLite is configured to support UTF-8, right rather than the left. If <i>X</i> is string
then characters indices refer to actual UTF-8 characters, not bytes.</td> then characters indices refer to actual UTF-8 characters. If
<i>X</i> is a BLOB then the indices refer to bytes.</td>
</tr> </tr>
<tr> <tr>