mirror of
https://github.com/sqlite/sqlite.git
synced 2025-10-24 09:53:10 +03:00
The built-in substr() function applied to a BLOB counts bytes, not characters. (CVS 3997)
FossilOrigin-Name: 75d573080d03ee48fe88710f70c6875ff9cae19c
This commit is contained in:
15
manifest
15
manifest
@@ -1,5 +1,5 @@
|
||||
C Make\ssure\sthe\sparser\saborts\squickly\sfollowing\sa\ssyntax\serror.\s(CVS\s3996)
|
||||
D 2007-05-15T00:09:13
|
||||
C The\sbuilt-in\ssubstr()\sfunction\sapplied\sto\sa\sBLOB\scounts\sbytes,\snot\scharacters.\s(CVS\s3997)
|
||||
D 2007-05-15T01:13:47
|
||||
F Makefile.in 87b200ad9970907f76df734d29dff3d294c10935
|
||||
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
|
||||
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
||||
@@ -71,7 +71,7 @@ F src/date.c 6049db7d5a8fdf2c677ff7d58fa31d4f6593c988
|
||||
F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29
|
||||
F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b
|
||||
F src/expr.c bad06f783d2daf1884a922859c8e7427323e7dc3
|
||||
F src/func.c b6586f83c141859c103dd4954de66efc3fbd101b
|
||||
F src/func.c cdbe36f2a71b7e6835189414c2fb626a339e300f
|
||||
F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5
|
||||
F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564
|
||||
F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d
|
||||
@@ -347,6 +347,7 @@ F test/speed2.test 53177056baf6556dcbdcf032bbdfc41c1aa74ded
|
||||
F test/sqllimits1.test b15a5784e47199d68fa1182157ba7e790f467d9f
|
||||
F test/subquery.test ae324ee928c5fb463a3ce08a8860d6e7f1ca5797
|
||||
F test/subselect.test 974e87f8fc91c5f00dd565316d396a5a6c3106c4
|
||||
F test/substr.test 9f26cfca74397b26ab217fb838c3d0549eb4bcf3
|
||||
F test/sync.test d05397b8f89f423dd6dba528692019ab036bc1c3
|
||||
F test/table.test dbdfd06aef054ad5aed8e57a782137d57d5c5528
|
||||
F test/tableapi.test 036575a98dcce7c92e9f39056839bbad8a715412
|
||||
@@ -468,7 +469,7 @@ F www/fullscanb.gif f7c94cb227f060511f8909e10f570157263e9a25
|
||||
F www/index-ex1-x-b.gif f9b1d85c3fa2435cf38b15970c7e3aa1edae23a3
|
||||
F www/index.tcl 8f500433525f34ef93427ba5f4f83fb5fde1e0e7
|
||||
F www/indirect1b1.gif adfca361d2df59e34f9c5cac52a670c2bfc303a1
|
||||
F www/lang.tcl 26a38c36ec6686aee0e46eeac90acb3aae373537
|
||||
F www/lang.tcl 8dde95ed76af71579a132c9bb730d3764056d3ff
|
||||
F www/lockingv3.tcl e52345bd20323bef6146bfce18ae0829b2b7c87d
|
||||
F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c
|
||||
F www/nulls.tcl ec35193f92485b87b90a994a01d0171b58823fcf
|
||||
@@ -490,7 +491,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
|
||||
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
||||
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
||||
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
|
||||
P af76928fc5891f9a222ac0c39f8b443a9707b9e3
|
||||
R 3e5c27a41d7feb2f3994d5008df8b7ba
|
||||
P d07cdd3c096c120d104ae13f7932c0a955324517
|
||||
R 2be0a4c9f659ac8ba502f4d27f744853
|
||||
U drh
|
||||
Z 9652c7d3c4c7e15de8852ab1114d57ec
|
||||
Z 429140db9719a9a8e8c56d1f7aa1fad7
|
||||
|
||||
@@ -1 +1 @@
|
||||
d07cdd3c096c120d104ae13f7932c0a955324517
|
||||
75d573080d03ee48fe88710f70c6875ff9cae19c
|
||||
26
src/func.c
26
src/func.c
@@ -16,7 +16,7 @@
|
||||
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
|
||||
** All other code has file scope.
|
||||
**
|
||||
** $Id: func.c,v 1.155 2007/05/12 06:11:12 danielk1977 Exp $
|
||||
** $Id: func.c,v 1.156 2007/05/15 01:13:47 drh Exp $
|
||||
*/
|
||||
#include "sqliteInt.h"
|
||||
#include <ctype.h>
|
||||
@@ -146,7 +146,14 @@ static void absFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of the substr() function
|
||||
** Implementation of the substr() function.
|
||||
**
|
||||
** substr(x,p1,p2) returns p2 characters of x[] beginning with p1.
|
||||
** p1 is 1-indexed. So substr(x,1,1) returns the first character
|
||||
** of x. If x is text, then we actually count UTF-8 characters.
|
||||
** If x is a blob, then we count bytes.
|
||||
**
|
||||
** If p1 is negative, then we begin abs(p1) from the end of x[].
|
||||
*/
|
||||
static void substrFunc(
|
||||
sqlite3_context *context,
|
||||
@@ -157,14 +164,22 @@ static void substrFunc(
|
||||
const unsigned char *z2;
|
||||
int i;
|
||||
int len;
|
||||
int p0type;
|
||||
i64 p1, p2;
|
||||
|
||||
assert( argc==3 );
|
||||
p0type = sqlite3_value_type(argv[0]);
|
||||
if( p0type==SQLITE_BLOB ){
|
||||
len = sqlite3_value_bytes(argv[0]);
|
||||
z = sqlite3_value_blob(argv[0]);
|
||||
if( z==0 ) return;
|
||||
}else{
|
||||
z = sqlite3_value_text(argv[0]);
|
||||
if( z==0 ) return;
|
||||
for(len=0, z2=z; *z2; z2++){ if( (0xc0&*z2)!=0x80 ) len++; }
|
||||
}
|
||||
p1 = sqlite3_value_int(argv[1]);
|
||||
p2 = sqlite3_value_int(argv[2]);
|
||||
for(len=0, z2=z; *z2; z2++){ if( (0xc0&*z2)!=0x80 ) len++; }
|
||||
if( p1<0 ){
|
||||
p1 += len;
|
||||
if( p1<0 ){
|
||||
@@ -177,6 +192,7 @@ static void substrFunc(
|
||||
if( p1+p2>len ){
|
||||
p2 = len-p1;
|
||||
}
|
||||
if( p0type!=SQLITE_BLOB ){
|
||||
for(i=0; i<p1 && z[i]; i++){
|
||||
if( (z[i]&0xc0)==0x80 ) p1++;
|
||||
}
|
||||
@@ -187,6 +203,10 @@ static void substrFunc(
|
||||
while( z[i] && (z[i]&0xc0)==0x80 ){ i++; p2++; }
|
||||
if( p2<0 ) p2 = 0;
|
||||
sqlite3_result_text(context, (char*)&z[p1], p2, SQLITE_TRANSIENT);
|
||||
}else{
|
||||
if( p2<0 ) p2 = 0;
|
||||
sqlite3_result_blob(context, (char*)&z[p1], p2, SQLITE_TRANSIENT);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
103
test/substr.test
Normal file
103
test/substr.test
Normal file
@@ -0,0 +1,103 @@
|
||||
# 2007 May 14
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
# This file implements regression tests for SQLite library. The
|
||||
# focus of this file is testing the built-in SUBSTR() functions.
|
||||
#
|
||||
# $Id: substr.test,v 1.1 2007/05/15 01:13:47 drh Exp $
|
||||
|
||||
set testdir [file dirname $argv0]
|
||||
source $testdir/tester.tcl
|
||||
|
||||
# Create a table to work with.
|
||||
#
|
||||
execsql {
|
||||
CREATE TABLE t1(t text, b blob)
|
||||
}
|
||||
proc substr-test {id string i1 i2 result} {
|
||||
db eval {
|
||||
DELETE FROM t1;
|
||||
INSERT INTO t1(t) VALUES($string)
|
||||
}
|
||||
do_test substr-$id.1 [subst {
|
||||
execsql {
|
||||
SELECT substr(t, $i1, $i2) FROM t1
|
||||
}
|
||||
}] [list $result]
|
||||
set qstr '[string map {' ''} $string]'
|
||||
do_test substr-$id.2 [subst {
|
||||
execsql {
|
||||
SELECT substr($qstr, $i1, $i2)
|
||||
}
|
||||
}] [list $result]
|
||||
}
|
||||
proc subblob-test {id hex i1 i2 hexresult} {
|
||||
db eval "
|
||||
DELETE FROM t1;
|
||||
INSERT INTO t1(b) VALUES(x'$hex')
|
||||
"
|
||||
do_test substr-$id.1 [subst {
|
||||
execsql {
|
||||
SELECT hex(substr(b, $i1, $i2)) FROM t1
|
||||
}
|
||||
}] [list $hexresult]
|
||||
do_test substr-$id.2 [subst {
|
||||
execsql {
|
||||
SELECT hex(substr(x'$hex', $i1, $i2))
|
||||
}
|
||||
}] [list $hexresult]
|
||||
}
|
||||
|
||||
# Basic SUBSTR functionality
|
||||
#
|
||||
substr-test 1.1 abcdefg 1 1 a
|
||||
substr-test 1.2 abcdefg 2 1 b
|
||||
substr-test 1.3 abcdefg 1 2 ab
|
||||
substr-test 1.4 abcdefg 1 100 abcdefg
|
||||
substr-test 1.5 abcdefg 0 1 a
|
||||
substr-test 1.6 abcdefg -1 1 g
|
||||
substr-test 1.7 abcdefg -1 10 g
|
||||
substr-test 1.8 abcdefg -5 3 cde
|
||||
substr-test 1.9 abcdefg -7 3 abc
|
||||
substr-test 1.10 abcdefg -100 98 abcde
|
||||
|
||||
# Make sure everything works with long unicode characters
|
||||
#
|
||||
substr-test 2.1 \u1234\u2345\u3456 1 1 \u1234
|
||||
substr-test 2.2 \u1234\u2345\u3456 2 1 \u2345
|
||||
substr-test 2.3 \u1234\u2345\u3456 1 2 \u1234\u2345
|
||||
substr-test 2.4 \u1234\u2345\u3456 -1 1 \u3456
|
||||
substr-test 2.5 a\u1234b\u2345c\u3456c -5 3 b\u2345c
|
||||
|
||||
# Basic functionality for BLOBs
|
||||
#
|
||||
subblob-test 3.1 61626364656667 1 1 61
|
||||
subblob-test 3.2 61626364656667 2 1 62
|
||||
subblob-test 3.3 61626364656667 1 2 6162
|
||||
subblob-test 3.4 61626364656667 1 100 61626364656667
|
||||
subblob-test 3.5 61626364656667 0 1 61
|
||||
subblob-test 3.6 61626364656667 -1 1 67
|
||||
subblob-test 3.7 61626364656667 -1 10 67
|
||||
subblob-test 3.8 61626364656667 -5 3 636465
|
||||
subblob-test 3.9 61626364656667 -7 3 616263
|
||||
subblob-test 3.10 61626364656667 -100 98 6162636465
|
||||
|
||||
# If these blobs were strings, then they would contain multi-byte
|
||||
# characters. But since they are blobs, the substr indices refer
|
||||
# to bytes.
|
||||
#
|
||||
subblob-test 4.1 61E188B462E28D8563E3919663 1 1 61
|
||||
subblob-test 4.2 61E188B462E28D8563E3919663 2 1 E1
|
||||
subblob-test 4.3 61E188B462E28D8563E3919663 1 2 61E1
|
||||
subblob-test 4.4 61E188B462E28D8563E3919663 -2 1 96
|
||||
subblob-test 4.5 61E188B462E28D8563E3919663 -5 4 63E39196
|
||||
subblob-test 4.6 61E188B462E28D8563E3919663 -100 98 61E188B462E28D8563E391
|
||||
|
||||
finish_test
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# Run this Tcl script to generate the lang-*.html files.
|
||||
#
|
||||
set rcsid {$Id: lang.tcl,v 1.128 2007/04/06 11:26:00 drh Exp $}
|
||||
set rcsid {$Id: lang.tcl,v 1.129 2007/05/15 01:13:47 drh Exp $}
|
||||
source common.tcl
|
||||
|
||||
if {[llength $argv]>0} {
|
||||
@@ -1455,8 +1455,9 @@ that is running. Example: "2.8.0"</td>
|
||||
with the <i>Y</i>-th character and which is <i>Z</i> characters long.
|
||||
The left-most character of <i>X</i> is number 1. If <i>Y</i> is negative
|
||||
the the first character of the substring is found by counting from the
|
||||
right rather than the left. If SQLite is configured to support UTF-8,
|
||||
then characters indices refer to actual UTF-8 characters, not bytes.</td>
|
||||
right rather than the left. If <i>X</i> is string
|
||||
then characters indices refer to actual UTF-8 characters. If
|
||||
<i>X</i> is a BLOB then the indices refer to bytes.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
|
||||
Reference in New Issue
Block a user