From 35cd643cc73770e35b8e15db5ac54ea577790e7a Mon Sep 17 00:00:00 2001
From: drh <drh@noemail.net>
Date: Fri, 5 Jun 2009 14:17:21 +0000
Subject: [PATCH] Take care that a corrupt variable-length integer does not
 cause 32-bit integer overflow when parsing a record format, nor cause
 excessively large memory allocations. (CVS 6719)

FossilOrigin-Name: 38b20327a80996c7044b88be32161ac4ac0ec3a9
---
 manifest        | 22 +++++++-------
 manifest.uuid   |  2 +-
 src/btree.c     |  8 ++---
 src/sqliteInt.h | 10 +++++-
 src/util.c      | 12 ++++++--
 src/vdbe.c      | 81 ++++++++++++++++++++++++++++++++++++-------------
 src/vdbeInt.h   |  8 ++---
 src/vdbeaux.c   | 19 ++++++------
 8 files changed, 107 insertions(+), 55 deletions(-)

diff --git a/manifest b/manifest
index b1cd2913b7..ebe66662fe 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Earlier\sdetection\sof\sfreelist\scorruption\sin\sthe\spage\sallocation\sroutines.\s(CVS\s6718)
-D 2009-06-04T19:06:10
+C Take\scare\sthat\sa\scorrupt\svariable-length\sinteger\sdoes\snot\scause\s32-bit\ninteger\soverflow\swhen\sparsing\sa\srecord\sformat,\snor\scause\sexcessively\slarge\nmemory\sallocations.\s(CVS\s6719)
+D 2009-06-05T14:17:22
 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
 F Makefile.in 8b8fb7823264331210cddf103831816c286ba446
 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -106,7 +106,7 @@ F src/auth.c 98db07c2088455797678eb1031f42d4d94d18a71
 F src/backup.c ff50af53184a5fd7bdee4d620b5dabef74717c79
 F src/bitvec.c 0ef0651714728055d43de7a4cdd95e703fac0119
 F src/btmutex.c 9b899c0d8df3bd68f527b0afe03088321b696d3c
-F src/btree.c 17ab7af7d250ba51d3b76eaf8b3885cbd8d91f47
+F src/btree.c 3a0e52943bf32aba524e3811f4bea0a2e59b078f
 F src/btree.h f70b694e8c163227369a66863b01fbff9009f323
 F src/btreeInt.h df64030d632f8c8ac217ed52e8b6b3eacacb33a5
 F src/build.c 20e02fd72249159ff6829009f3029d16d59cdff5
@@ -162,7 +162,7 @@ F src/select.c 2d97084a176a63eabce2d043eb4fbb13c46d6e9f
 F src/shell.c db2643650b9268df89a4bedca3f1c6d9e786f1bb
 F src/sqlite.h.in 79210c4d8905cfb4b038486dde5f36fabb796a86
 F src/sqlite3ext.h 1db7d63ab5de4b3e6b83dd03d1a4e64fef6d2a17
-F src/sqliteInt.h 474e85cc85f78c18b8dbaec5cb786cdba6b45183
+F src/sqliteInt.h f8d70341d527404c5f162dc7fcc0f005700d0b48
 F src/sqliteLimit.h ffe93f5a0c4e7bd13e70cd7bf84cfb5c3465f45d
 F src/status.c 237b193efae0cf6ac3f0817a208de6c6c6ef6d76
 F src/table.c cc86ad3d6ad54df7c63a3e807b5783c90411a08d
@@ -201,13 +201,13 @@ F src/tokenize.c 75367c7e4d2aee39a3b0496911284b73de5b4363
 F src/trigger.c c07c5157c58fcdb704f65d5f5e4775276e45bb8b
 F src/update.c 6ae6c26adff8dc34532d578f66e6cfde04b5d177
 F src/utf.c 9541d28f40441812c0b40f00334372a0542c00ff
-F src/util.c a9719d309f6c65b3b79fa3ca8512fa8e3947a391
+F src/util.c 8ff385a6b474e840d4fa3621f5f7263028ac892c
 F src/vacuum.c 0e14f371ea3326c6b8cfba257286d798cd20db59
-F src/vdbe.c d105cc58a6a0fa08a3fade86633e90d57a7a8129
+F src/vdbe.c 7f8639cf36a0bb87a4e31bc31432f8af10c3b252
 F src/vdbe.h 35a648bc3279a120da24f34d9a25213ec15daf8a
-F src/vdbeInt.h 43183a2a18654fa570219ab65e53a608057c48ae
+F src/vdbeInt.h 3727128255a93d116e454f67d4559700f7ae4d6f
 F src/vdbeapi.c 86aa27a5f3493aaffb8ac051782aa3b22670d7ed
-F src/vdbeaux.c 37730f227a5301c04e5bf03fd303b9086ada990c
+F src/vdbeaux.c 78ff6c355ccc2d211350f507bccfcd95118169e8
 F src/vdbeblob.c c25d7e7bc6d5917feeb17270bd275fa771f26e5c
 F src/vdbemem.c 05183d46094aa99b8f8350e5761b9369dbef35a8
 F src/vtab.c e2f4c92df7d06330b151448718c4724742ff444b
@@ -733,7 +733,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
 F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
 F tool/vdbe-compress.tcl 672f81d693a03f80f5ae60bfefacd8a349e76746
-P 1335e4440f5a3d24ce9ce187e0e23fc9b166ca98
-R 6fa526d3fc2cd5b05a7afb7c2a31b3a3
+P e557c8e5846f9c4eaaeb3bd07614ac101bb0b3d0
+R aadc9c06bb2a87f3514e9a9c55bcfa54
 U drh
-Z 24570d1925b89f9799c0bf39f48794de
+Z 216b11c06f5be02fd997159248e30587
diff --git a/manifest.uuid b/manifest.uuid
index 1f1911f2aa..7ad184ffeb 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-e557c8e5846f9c4eaaeb3bd07614ac101bb0b3d0
\ No newline at end of file
+38b20327a80996c7044b88be32161ac4ac0ec3a9
\ No newline at end of file
diff --git a/src/btree.c b/src/btree.c
index c963f3d92e..c113f6d467 100644
--- a/src/btree.c
+++ b/src/btree.c
@@ -9,7 +9,7 @@
 **    May you share freely, never taking more than you give.
 **
 *************************************************************************
-** $Id: btree.c,v 1.617 2009/06/04 19:06:10 drh Exp $
+** $Id: btree.c,v 1.618 2009/06/05 14:17:22 drh Exp $
 **
 ** This file implements a external (disk-based) database using BTrees.
 ** See the header comment on "btreeInt.h" for additional information.
@@ -4393,7 +4393,7 @@ static int allocateBtreePage(
 ){
   MemPage *pPage1;
   int rc;
-  int n;     /* Number of pages on the freelist */
+  u32 n;     /* Number of pages on the freelist */
   int k;     /* Number of leaves on the trunk of the freelist */
   MemPage *pTrunk = 0;
   MemPage *pPrevTrunk = 0;
@@ -4458,10 +4458,6 @@ static int allocateBtreePage(
       }
 
       k = get4byte(&pTrunk->aData[4]);
-      if( k>mxPage ){
-        rc = SQLITE_CORRUPT_BKPT;
-        goto end_allocate_page;
-      }
       if( k==0 && !searchList ){
         /* The trunk has no leaves and the list is not being searched. 
         ** So extract the trunk page itself and use it as the newly 
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index eaf35872f7..0d6acda5d7 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -11,7 +11,7 @@
 *************************************************************************
 ** Internal interface definitions for SQLite.
 **
-** @(#) $Id: sqliteInt.h,v 1.881 2009/06/02 21:31:39 drh Exp $
+** @(#) $Id: sqliteInt.h,v 1.882 2009/06/05 14:17:23 drh Exp $
 */
 #ifndef _SQLITEINT_H_
 #define _SQLITEINT_H_
@@ -431,6 +431,14 @@ typedef INT16_TYPE i16;            /* 2-byte signed integer */
 typedef UINT8_TYPE u8;             /* 1-byte unsigned integer */
 typedef INT8_TYPE i8;              /* 1-byte signed integer */
 
+/*
+** SQLITE_MAX_U32 is a u64 constant that is the maximum u64 value
+** that can be stored in a u32 without loss of data.  The value
+** is 0x00000000ffffffff.  But because of quirks of some compilers, we
+** have to specify the value in the less intuitive manner shown:
+*/
+#define SQLITE_MAX_U32  ((((u64)1)<<32)-1)
+
 /*
 ** Macros to determine whether the machine is big or little endian,
 ** evaluated at runtime.
diff --git a/src/util.c b/src/util.c
index 6d4ca2f441..1f22ccee01 100644
--- a/src/util.c
+++ b/src/util.c
@@ -14,7 +14,7 @@
 ** This file contains functions for allocating memory, comparing
 ** strings, and stuff like that.
 **
-** $Id: util.c,v 1.257 2009/05/31 21:21:41 drh Exp $
+** $Id: util.c,v 1.258 2009/06/05 14:17:23 drh Exp $
 */
 #include "sqliteInt.h"
 #include <stdarg.h>
@@ -766,6 +766,10 @@ u8 sqlite3GetVarint(const unsigned char *p, u64 *v){
 /*
 ** Read a 32-bit variable-length integer from memory starting at p[0].
 ** Return the number of bytes read.  The value is stored in *v.
+**
+** If the varint stored in p[0] is larger than can fit in a 32-bit unsigned
+** integer, then set *v to 0xffffffff.
+**
 ** A MACRO version, getVarint32, is provided which inlines the 
 ** single-byte case.  All code should use the MACRO version as 
 ** this function assumes the single-byte case has already been handled.
@@ -831,7 +835,11 @@ u8 sqlite3GetVarint32(const unsigned char *p, u32 *v){
     p -= 2;
     n = sqlite3GetVarint(p, &v64);
     assert( n>3 && n<=9 );
-    *v = (u32)v64;
+    if( (v64 & SQLITE_MAX_U32)!=v64 ){
+      *v = 0xffffffff;
+    }else{
+      *v = (u32)v64;
+    }
     return n;
   }
 
diff --git a/src/vdbe.c b/src/vdbe.c
index 6568a193cf..2d1e5978da 100644
--- a/src/vdbe.c
+++ b/src/vdbe.c
@@ -43,7 +43,7 @@
 ** in this file for details.  If in doubt, do not deviate from existing
 ** commenting and indentation practices when changing or adding code.
 **
-** $Id: vdbe.c,v 1.846 2009/06/03 11:25:07 danielk1977 Exp $
+** $Id: vdbe.c,v 1.847 2009/06/05 14:17:24 drh Exp $
 */
 #include "sqliteInt.h"
 #include "vdbeInt.h"
@@ -2002,7 +2002,7 @@ case OP_SetNumColumns: {
 ** the result.
 */
 case OP_Column: {
-  int payloadSize;   /* Number of bytes in the record */
+  u32 payloadSize;   /* Number of bytes in the record */
   i64 payloadSize64; /* Number of bytes in the record */
   int p1;            /* P1 value of the opcode */
   int p2;            /* column number to retrieve */
@@ -2017,11 +2017,12 @@ case OP_Column: {
   char *zData;       /* Part of the record being decoded */
   Mem *pDest;        /* Where to write the extracted value */
   Mem sMem;          /* For storing the record being decoded */
-  u8 *zIdx;         /* Index into header */
-  u8 *zEndHdr;      /* Pointer to first byte after the header */
-  int offset;       /* Offset into the data */
-  int szHdrSz;      /* Size of the header size field at start of record */
-  int avail;        /* Number of bytes of available data */
+  u8 *zIdx;          /* Index into header */
+  u8 *zEndHdr;       /* Pointer to first byte after the header */
+  u32 offset;        /* Offset into the data */
+  u64 offset64;      /* 64-bit offset.  64 bits needed to catch overflow */
+  int szHdr;         /* Size of the header size field at start of record */
+  int avail;         /* Number of bytes of available data */
 
 
   p1 = pOp->p1;
@@ -2063,9 +2064,13 @@ case OP_Column: {
       zRec = (char*)pC->aRow;
     }else if( pC->isIndex ){
       sqlite3BtreeKeySize(pCrsr, &payloadSize64);
-      payloadSize = (int)payloadSize64;
+      if( (payloadSize64 & SQLITE_MAX_U32)!=(u64)payloadSize64 ){
+        rc = SQLITE_CORRUPT_BKPT;
+        goto abort_due_to_error;
+      }
+      payloadSize = (u32)payloadSize64;
     }else{
-      sqlite3BtreeDataSize(pCrsr, (u32 *)&payloadSize);
+      sqlite3BtreeDataSize(pCrsr, &payloadSize);
     }
     nField = pC->nField;
   }else{
@@ -2084,7 +2089,8 @@ case OP_Column: {
     assert( pDest->flags&MEM_Null );
     goto op_column_out;
   }
-  if( payloadSize>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+  assert( db->aLimit[SQLITE_LIMIT_LENGTH]>=0 );
+  if( payloadSize > (u32)db->aLimit[SQLITE_LIMIT_LENGTH] ){
     goto too_big;
   }
 
@@ -2117,7 +2123,8 @@ case OP_Column: {
       ** having to make additional calls to fetch the content portion of
       ** the record.
       */
-      if( avail>=payloadSize ){
+      assert( avail>=0 );
+      if( payloadSize <= (u32)avail ){
         zRec = zData;
         pC->aRow = (u8*)zData;
       }else{
@@ -2127,7 +2134,37 @@ case OP_Column: {
     /* The following assert is true in all cases accept when
     ** the database file has been corrupted externally.
     **    assert( zRec!=0 || avail>=payloadSize || avail>=9 ); */
-    szHdrSz = getVarint32((u8*)zData, offset);
+    szHdr = getVarint32((u8*)zData, offset);
+
+    /* Make sure a corrupt database has not given us an oversize header.
+    ** Do this now to avoid an oversize memory allocation.
+    **
+    ** Type entries can be between 1 and 5 bytes each.  But 4 and 5 byte
+    ** types use so much data space that there can only be 4096 and 32 of
+    ** them, respectively.  So the maximum header length results from a
+    ** 3-byte type for each of the maximum of 32768 columns plus three
+    ** extra bytes for the header length itself.  32768*3 + 3 = 98307.
+    */
+    if( offset > 98307 ){
+      rc = SQLITE_CORRUPT_BKPT;
+      goto op_column_out;
+    }
+
+    /* Compute in len the number of bytes of data we need to read in order
+    ** to get nField type values.  offset is an upper bound on this.  But
+    ** nField might be significantly less than the true number of columns
+    ** in the table, and in that case, 5*nField+3 might be smaller than offset.
+    ** We want to minimize len in order to limit the size of the memory
+    ** allocation, especially if a corrupt database file has caused offset
+    ** to be oversized. Offset is limited to 98307 above.  But 98307 might
+    ** still exceed Robson memory allocation limits on some configurations.
+    ** On systems that cannot tolerate large memory allocations, nField*5+3
+    ** will likely be much smaller since nField will likely be less than
+    ** 20 or so.  This insures that Robson memory allocation limits are
+    ** not exceeded even for corrupt database files.
+    */
+    len = nField*5 + 3;
+    if( len > offset ) len = offset;
 
     /* The KeyFetch() or DataFetch() above are fast and will get the entire
     ** record header in most cases.  But they will fail to get the complete
@@ -2135,28 +2172,29 @@ case OP_Column: {
     ** in the B-Tree.  When that happens, use sqlite3VdbeMemFromBtree() to
     ** acquire the complete header text.
     */
-    if( !zRec && avail<offset ){
+    if( !zRec && avail<len ){
       sMem.flags = 0;
       sMem.db = 0;
-      rc = sqlite3VdbeMemFromBtree(pCrsr, 0, offset, pC->isIndex, &sMem);
+      rc = sqlite3VdbeMemFromBtree(pCrsr, 0, len, pC->isIndex, &sMem);
       if( rc!=SQLITE_OK ){
         goto op_column_out;
       }
       zData = sMem.z;
     }
-    zEndHdr = (u8 *)&zData[offset];
-    zIdx = (u8 *)&zData[szHdrSz];
+    zEndHdr = (u8 *)&zData[len];
+    zIdx = (u8 *)&zData[szHdr];
 
     /* Scan the header and use it to fill in the aType[] and aOffset[]
     ** arrays.  aType[i] will contain the type integer for the i-th
     ** column and aOffset[i] will contain the offset from the beginning
     ** of the record to the start of the data for the i-th column
     */
+    offset64 = offset;
     for(i=0; i<nField; i++){
       if( zIdx<zEndHdr ){
-        aOffset[i] = offset;
+        aOffset[i] = (u32)offset64;
         zIdx += getVarint32(zIdx, aType[i]);
-        offset += sqlite3VdbeSerialTypeLen(aType[i]);
+        offset64 += sqlite3VdbeSerialTypeLen(aType[i]);
       }else{
         /* If i is less that nField, then there are less fields in this
         ** record than SetNumColumns indicated there are columns in the
@@ -2176,8 +2214,8 @@ case OP_Column: {
     ** of the record (when all fields present), then we must be dealing 
     ** with a corrupt database.
     */
-    if( (zIdx > zEndHdr)|| (offset > payloadSize)
-     || (zIdx==zEndHdr && offset!=payloadSize) ){
+    if( (zIdx > zEndHdr)|| (offset64 > payloadSize)
+     || (zIdx==zEndHdr && offset64!=(u64)payloadSize) ){
       rc = SQLITE_CORRUPT_BKPT;
       goto op_column_out;
     }
@@ -2869,7 +2907,8 @@ case OP_VerifyCookie: {
 ** a KeyInfo structure (P4_KEYINFO). If it is a pointer to a KeyInfo 
 ** structure, then said structure defines the content and collating 
 ** sequence of the index being opened. Otherwise, if P4 is an integer 
-** value, it is set to the number of columns in the table.
+** value, it is set to the number of columns in the table, or to the
+** largest index of any column of the table that is actually used.
 **
 ** This instruction works just like OpenRead except that it opens the cursor
 ** in read/write mode.  For a given table, there can be one or more read-only
diff --git a/src/vdbeInt.h b/src/vdbeInt.h
index a86ed6922c..65e2495704 100644
--- a/src/vdbeInt.h
+++ b/src/vdbeInt.h
@@ -15,7 +15,7 @@
 ** 6000 lines long) it was split up into several smaller files and
 ** this header information was factored out.
 **
-** $Id: vdbeInt.h,v 1.170 2009/05/04 11:42:30 danielk1977 Exp $
+** $Id: vdbeInt.h,v 1.171 2009/06/05 14:17:25 drh Exp $
 */
 #ifndef _VDBEINT_H_
 #define _VDBEINT_H_
@@ -338,10 +338,10 @@ int sqlite3VdbeCursorMoveto(VdbeCursor*);
 #if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE)
 void sqlite3VdbePrintOp(FILE*, int, Op*);
 #endif
-int sqlite3VdbeSerialTypeLen(u32);
+u32 sqlite3VdbeSerialTypeLen(u32);
 u32 sqlite3VdbeSerialType(Mem*, int);
-int sqlite3VdbeSerialPut(unsigned char*, int, Mem*, int);
-int sqlite3VdbeSerialGet(const unsigned char*, u32, Mem*);
+u32 sqlite3VdbeSerialPut(unsigned char*, int, Mem*, int);
+u32 sqlite3VdbeSerialGet(const unsigned char*, u32, Mem*);
 void sqlite3VdbeDeleteAuxData(VdbeFunc*, int);
 
 int sqlite2BtreeKeyCompare(BtCursor *, const void *, int, int, int *);
diff --git a/src/vdbeaux.c b/src/vdbeaux.c
index 711de8b5f9..c1b60b0b99 100644
--- a/src/vdbeaux.c
+++ b/src/vdbeaux.c
@@ -14,7 +14,7 @@
 ** to version 2.8.7, all this code was combined into the vdbe.c source file.
 ** But that file was getting too big so this subroutines were split out.
 **
-** $Id: vdbeaux.c,v 1.458 2009/05/29 19:00:13 drh Exp $
+** $Id: vdbeaux.c,v 1.459 2009/06/05 14:17:25 drh Exp $
 */
 #include "sqliteInt.h"
 #include "vdbeInt.h"
@@ -2120,7 +2120,7 @@ u32 sqlite3VdbeSerialType(Mem *pMem, int file_format){
 /*
 ** Return the length of the data corresponding to the supplied serial-type.
 */
-int sqlite3VdbeSerialTypeLen(u32 serial_type){
+u32 sqlite3VdbeSerialTypeLen(u32 serial_type){
   if( serial_type>=12 ){
     return (serial_type-12)/2;
   }else{
@@ -2200,14 +2200,14 @@ static u64 floatSwap(u64 in){
 ** of bytes in the zero-filled tail is included in the return value only
 ** if those bytes were zeroed in buf[].
 */ 
-int sqlite3VdbeSerialPut(u8 *buf, int nBuf, Mem *pMem, int file_format){
+u32 sqlite3VdbeSerialPut(u8 *buf, int nBuf, Mem *pMem, int file_format){
   u32 serial_type = sqlite3VdbeSerialType(pMem, file_format);
-  int len;
+  u32 len;
 
   /* Integer and Real */
   if( serial_type<=7 && serial_type>0 ){
     u64 v;
-    int i;
+    u32 i;
     if( serial_type==7 ){
       assert( sizeof(v)==sizeof(pMem->r) );
       memcpy(&v, &pMem->r, sizeof(v));
@@ -2233,8 +2233,9 @@ int sqlite3VdbeSerialPut(u8 *buf, int nBuf, Mem *pMem, int file_format){
     memcpy(buf, pMem->z, len);
     if( pMem->flags & MEM_Zero ){
       len += pMem->u.nZero;
-      if( len>nBuf ){
-        len = nBuf;
+      assert( nBuf>=0 );
+      if( len > (u32)nBuf ){
+        len = (u32)nBuf;
       }
       memset(&buf[pMem->n], 0, len-pMem->n);
     }
@@ -2249,7 +2250,7 @@ int sqlite3VdbeSerialPut(u8 *buf, int nBuf, Mem *pMem, int file_format){
 ** Deserialize the data blob pointed to by buf as serial type serial_type
 ** and store the result in pMem.  Return the number of bytes read.
 */ 
-int sqlite3VdbeSerialGet(
+u32 sqlite3VdbeSerialGet(
   const unsigned char *buf,     /* Buffer to deserialize from */
   u32 serial_type,              /* Serial type to deserialize */
   Mem *pMem                     /* Memory cell to write value into */
@@ -2327,7 +2328,7 @@ int sqlite3VdbeSerialGet(
       return 0;
     }
     default: {
-      int len = (serial_type-12)/2;
+      u32 len = (serial_type-12)/2;
       pMem->z = (char *)buf;
       pMem->n = len;
       pMem->xDel = 0;