diff --git a/manifest b/manifest index f2b0e41313..d485703eda 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sproblem\swith\stableapi.test\son\sWindows. -D 2011-03-24T17:37:24.174 +C Minor\schange\sto\ssqlite3Utf8Read()\sto\smake\sconsistent\swith\sREAD_UTF8()\susage\sand\savoid\simplementation\sdefined\susages\sof\s<<.\s\s\nAdded\ssome\sadditional\sUTF-8\stest\scases. +D 2011-03-24T17:43:18.990 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 27701a1653595a1f2187dc61c8117e00a6c1d50f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -228,7 +228,7 @@ F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/tokenize.c 604607d6813e9551cf5189d899e0a25c12681080 F src/trigger.c ec4813709e990a169b6923293e839fa5dfd64282 F src/update.c 81911be16ece3c3e7716aa18565b4814ec41f8b9 -F src/utf.c 1baeeac91707a4df97ccc6141ec0f808278af685 +F src/utf.c d83650c3ea08f7407bd9d0839d9885241c209c60 F src/util.c cd997077bad039efc0597eb027c929658f93c018 F src/vacuum.c 924bd1bcee2dfb05376f79845bd3b4cec7b54b2f F src/vdbe.c e3f37ca0afdd72e883475e2a32a06167df2810d0 @@ -281,6 +281,7 @@ F test/backup2.test b7c69f937c912e85ac8a5dbd1e1cf290302b2d49 F test/backup_ioerr.test 1f012e692f42c0442ae652443258f70e9f20fa38 F test/backup_malloc.test 7162d604ec2b4683c4b3799a48657fb8b5e2d450 F test/badutf.test d5360fc31f643d37a973ab0d8b4fb85799c3169f +F test/badutf2.test a47fda0d986d5325aa0ec2a0ebdd2d68db45e623 F test/between.test 16b1776c6323faadb097a52d673e8e3d8be7d070 F test/bigfile.test a8ec8073a20207456dab01a29ad9cde42b0dd103 F test/bigrow.test f0aeb7573dcb8caaafea76454be3ade29b7fc747 @@ -915,7 +916,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P b6e268fce12829f058f1dfa223731ec8479493f8 -R c7f011955e8b01d6bf0bfb373d4d196c +P 69fe0c873d702ef1d781453ee6ac2b1fb77fce48 +R 3655844fccb71eb3fdafa07a1f475145 U shaneh -Z fc0302ddf1dde83de45345457bb8eef1 +Z 4c379d73c236b4115e9dab5017415240 diff --git a/manifest.uuid b/manifest.uuid index 7551016267..44925bdb50 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -69fe0c873d702ef1d781453ee6ac2b1fb77fce48 \ No newline at end of file +7173b3929fae4e678223b0e978a2da7fa50a9005 \ No newline at end of file diff --git a/src/utf.c b/src/utf.c index 8312cf9337..95182694d3 100644 --- a/src/utf.c +++ b/src/utf.c @@ -167,7 +167,7 @@ int sqlite3Utf8Read( const unsigned char *zIn, /* First byte of UTF-8 character */ const unsigned char **pzNext /* Write first byte past UTF-8 char here */ ){ - int c; + unsigned int c; /* Same as READ_UTF8() above but without the zTerm parameter. ** For this routine, we assume the UTF8 string is always zero-terminated. @@ -410,15 +410,15 @@ int sqlite3Utf8CharLen(const char *zIn, int nByte){ ** This has the effect of making sure that the string is well-formed ** UTF-8. Miscoded characters are removed. ** -** The translation is done in-place (since it is impossible for the -** correct UTF-8 encoding to be longer than a malformed encoding). +** The translation is done in-place and aborted if the output +** overruns the input. */ int sqlite3Utf8To8(unsigned char *zIn){ unsigned char *zOut = zIn; unsigned char *zStart = zIn; u32 c; - while( zIn[0] ){ + while( zIn[0] && zOut<=zIn ){ c = sqlite3Utf8Read(zIn, (const u8**)&zIn); if( c!=0xfffd ){ WRITE_UTF8(zOut, c); diff --git a/test/badutf2.test b/test/badutf2.test new file mode 100644 index 0000000000..462e98892e --- /dev/null +++ b/test/badutf2.test @@ -0,0 +1,119 @@ +# 2011 March 15 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. +# +# This file checks to make sure SQLite is able to gracEFully +# handle malformed UTF-8. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +proc utf8_to_ustr2 {s} { + set r "" + foreach i [split $s ""] { + scan $i %c c + append r [format \\u%04.4X $c] + } + set r +} + +proc utf8_to_hstr {in} { + regsub -all -- {(..)} $in {%[format "%s" \1]} out + subst $out +} + +proc utf8_to_xstr {in} { + regsub -all -- {(..)} $in {\\\\x[format "%s" \1]} out + subst $out +} + +proc utf8_to_ustr {in} { + regsub -all -- {(..)} $in {\\\\u[format "%04.4X" 0x\1]} out + subst $out +} + +do_test badutf2-1.0 { + db close + forcedelete test.db + sqlite3 db test.db + db eval "PRAGMA encoding = 'UTF-8'" +} {} + +do_test badutf2-4.0 { + set S [sqlite3_prepare_v2 db "SELECT ?" -1 dummy] + sqlite3_expired $S +} {0} + +foreach { i len uval xstr ustr u2u } { +1 1 00 \x00 {} {} +2 1 01 \x01 "\\u0001" 01 +3 1 3F \x3F "\\u003F" 3F +4 1 7F \x7F "\\u007F" 7F +5 1 80 \x80 "\\u0080" C280 +6 1 C3BF \xFF "\\u00FF" C3BF +7 3 EFBFBD \xEF\xBF\xBD "\\uFFFD" {} +} { + + set hstr [ utf8_to_hstr $uval ] + + ifcapable bloblit { + if {$hstr != "%00"} { + do_test badutf2-2.1.$i { + set sql "SELECT '$hstr'=CAST(x'$uval' AS text) AS x;" + set res [ sqlite3_exec db $sql ] + lindex [ lindex $res 1] 1 + } {1} + do_test badutf2-2.2.$i { + set sql "SELECT CAST('$hstr' AS blob)=x'$uval' AS x;" + set res [ sqlite3_exec db $sql ] + lindex [ lindex $res 1] 1 + } {1} + } + do_test badutf2-2.3.$i { + set sql "SELECT hex(CAST(x'$uval' AS text)) AS x;" + set res [ sqlite3_exec db $sql ] + lindex [ lindex $res 1] 1 + } $uval + do_test badutf2-2.4.$i { + set sql "SELECT hex(CAST(x'$uval' AS text)) AS x;" + set res [ sqlite3_exec db $sql ] + lindex [ lindex $res 1] 1 + } $uval + } + + if {$hstr != "%00"} { + do_test badutf2-3.1.$i { + set sql "SELECT hex('$hstr') AS x;" + set res [ sqlite3_exec db $sql ] + lindex [ lindex $res 1] 1 + } $uval + } + + do_test badutf2-4.1.$i { + sqlite3_reset $S + sqlite3_bind_text $S 1 $xstr $len + sqlite3_step $S + utf8_to_ustr2 [ sqlite3_column_text $S 0 ] + } $ustr + + do_test badutf2-5.1.$i { + utf8_to_utf8 $uval + } $u2u + +} + +do_test badutf2-4.2 { + sqlite3_finalize $S +} {SQLITE_OK} + + +finish_test