Make sure that UTF16 to UTF8 conversions to not read past the end of the

UTF16 input buffer if the last two bytes of the UTF16 happen to be the first half of a surrogate pair. Ticket [3fe897352e] FossilOrigin-Name: 19064d7cea838e1a93fe63743ed247f440679e97
2025-11-09 14:21:03 +03:00 · 2009-10-23 18:15:46 +00:00
parent 9bd1b44944
commit 7c95b0f3da
5 changed files with 189 additions and 21 deletions
--- a/src/utf.c
+++ b/src/utf.c
@@ -107,20 +107,20 @@ static const unsigned char sqlite3Utf8Trans1[] = {
  }                                                                 \
 }

-#define READ_UTF16LE(zIn, c){                                         \
+#define READ_UTF16LE(zIn, zTerm, c){                                  \
  c = (*zIn++);                                                       \
  c += ((*zIn++)<<8);                                                 \
-  if( c>=0xD800 && c<0xE000 ){                                        \
+  if( c>=0xD800 && c<0xE000 && zIn<zTerm ){                           \
    int c2 = (*zIn++);                                                \
    c2 += ((*zIn++)<<8);                                              \
    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
  }                                                                   \
 }

-#define READ_UTF16BE(zIn, c){                                         \
+#define READ_UTF16BE(zIn, zTerm, c){                                  \
  c = ((*zIn++)<<8);                                                  \
  c += (*zIn++);                                                      \
-  if( c>=0xD800 && c<0xE000 ){                                        \
+  if( c>=0xD800 && c<0xE000 && zIn<zTerm ){                           \
    int c2 = ((*zIn++)<<8);                                           \
    c2 += (*zIn++);                                                   \
    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
@@ -305,13 +305,13 @@ int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */
      while( zIn<zTerm ){
-        READ_UTF16LE(zIn, c); 
+        READ_UTF16LE(zIn, zTerm, c); 
        WRITE_UTF8(z, c);
      }
    }else{
      /* UTF-16 Big-endian -> UTF-8 */
      while( zIn<zTerm ){
-        READ_UTF16BE(zIn, c); 
+        READ_UTF16BE(zIn, zTerm, c); 
        WRITE_UTF8(z, c);
      }
    }
@@ -488,6 +488,7 @@ char *sqlite3Utf8to16(sqlite3 *db, u8 enc, char *z, int n, int *pnOut){
 int sqlite3Utf16ByteLen(const void *zIn, int nChar){
  int c;
  unsigned char const *z = zIn;
+  unsigned char const *zTerm = &z[nChar];
  int n = 0;
  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
    /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
@@ -500,12 +501,12 @@ int sqlite3Utf16ByteLen(const void *zIn, int nChar){
    ** penalty is paid for this "if" statement.
    */
    while( n<nChar ){
-      READ_UTF16BE(z, c);
+      READ_UTF16BE(z, zTerm, c);
      n++;
    }
  }else{
    while( n<nChar ){
-      READ_UTF16LE(z, c);
+      READ_UTF16LE(z, zTerm, c);
      n++;
    }
  }
@@ -547,7 +548,7 @@ void sqlite3UtfSelfTest(void){
    assert( n>0 && n<=4 );
    z[0] = 0;
    z = zBuf;
-    READ_UTF16LE(z, c);
+    READ_UTF16LE(z, &zBuf[n], c);
    assert( c==i );
    assert( (z-zBuf)==n );
  }
@@ -559,7 +560,7 @@ void sqlite3UtfSelfTest(void){
    assert( n>0 && n<=4 );
    z[0] = 0;
    z = zBuf;
-    READ_UTF16BE(z, c);
+    READ_UTF16BE(z, &zBuf[n], c);
    assert( c==i );
    assert( (z-zBuf)==n );
  }