Minor change to sqlite3Utf8Read() to make consistent with READ_UTF8() usage and avoid implementation defined usages of <<.

Added some additional UTF-8 test cases. FossilOrigin-Name: 7173b3929fae4e678223b0e978a2da7fa50a9005
2025-11-09 14:21:03 +03:00 · 2011-03-24 17:43:18 +00:00
parent d71245c2ad
commit dba2cc43c4
4 changed files with 131 additions and 11 deletions
--- a/src/utf.c
+++ b/src/utf.c
@@ -167,7 +167,7 @@ int sqlite3Utf8Read(
  const unsigned char *zIn,       /* First byte of UTF-8 character */
  const unsigned char **pzNext    /* Write first byte past UTF-8 char here */
 ){
-  int c;
+  unsigned int c;

  /* Same as READ_UTF8() above but without the zTerm parameter.
  ** For this routine, we assume the UTF8 string is always zero-terminated.
@@ -410,15 +410,15 @@ int sqlite3Utf8CharLen(const char *zIn, int nByte){
 ** This has the effect of making sure that the string is well-formed
 ** UTF-8.  Miscoded characters are removed.
 **
-** The translation is done in-place (since it is impossible for the
-** correct UTF-8 encoding to be longer than a malformed encoding).
+** The translation is done in-place and aborted if the output
+** overruns the input.
 */
 int sqlite3Utf8To8(unsigned char *zIn){
  unsigned char *zOut = zIn;
  unsigned char *zStart = zIn;
  u32 c;

-  while( zIn[0] ){
+  while( zIn[0] && zOut<=zIn ){
    c = sqlite3Utf8Read(zIn, (const u8**)&zIn);
    if( c!=0xfffd ){
      WRITE_UTF8(zOut, c);