1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-11-09 14:21:03 +03:00

Rework the text to numeric conversion routines so that they work with either

UTF8 or UTF16 and do not require a NULL terminator.  This allowed text to
numeric conversion without reallocating the string.

FossilOrigin-Name: 14eed3a0e0a45c6f2904a3a134aa27c159916f7b
This commit is contained in:
drh
2010-09-30 00:50:49 +00:00
parent fac2bd452a
commit 9339da1f22
11 changed files with 134 additions and 161 deletions

View File

@@ -238,61 +238,19 @@ int sqlite3_strnicmp(const char *zLeft, const char *zRight, int N){
}
/*
** Return TRUE if z is a pure numeric string. Return FALSE and leave
** *realnum unchanged if the string contains any character which is not
** part of a number.
**
** If the string is pure numeric, set *realnum to TRUE if the string
** contains the '.' character or an "E+000" style exponentiation suffix.
** Otherwise set *realnum to FALSE. Note that just becaue *realnum is
** false does not mean that the number can be successfully converted into
** an integer - it might be too big.
**
** An empty string is considered non-numeric.
*/
int sqlite3IsNumber(const char *z, int *realnum, u8 enc){
int incr = (enc==SQLITE_UTF8?1:2);
if( enc==SQLITE_UTF16BE ) z++;
if( *z=='-' || *z=='+' ) z += incr;
if( !sqlite3Isdigit(*z) ){
return 0;
}
z += incr;
*realnum = 0;
while( sqlite3Isdigit(*z) ){ z += incr; }
#ifndef SQLITE_OMIT_FLOATING_POINT
if( *z=='.' ){
z += incr;
if( !sqlite3Isdigit(*z) ) return 0;
while( sqlite3Isdigit(*z) ){ z += incr; }
*realnum = 1;
}
if( *z=='e' || *z=='E' ){
z += incr;
if( *z=='+' || *z=='-' ) z += incr;
if( !sqlite3Isdigit(*z) ) return 0;
while( sqlite3Isdigit(*z) ){ z += incr; }
*realnum = 1;
}
#endif
return *z==0;
}
/*
** The string z[] is an ASCII representation of a real number.
** The string z[] is an text representation of a real number.
** Convert this string to a double.
**
** This routine assumes that z[] really is a valid number. If it
** is not, the result is undefined.
** The string z[] is length bytes in length (bytes, not characters) and
** uses the encoding enc. The string is not necessarily zero-terminated.
**
** This routine is used instead of the library atof() function because
** the library atof() might want to use "," as the decimal point instead
** of "." depending on how locale is set. But that would cause problems
** for SQL. So this routine always uses "." regardless of locale.
** Return TRUE if the result is a valid real number (or integer) and FALSE
** if the string is empty or contains extraneous text.
*/
int sqlite3AtoF(const char *z, double *pResult){
int sqlite3AtoF(const char *z, double *pResult, int length, u8 enc){
#ifndef SQLITE_OMIT_FLOATING_POINT
const char *zBegin = z;
int incr = (enc==SQLITE_UTF8?1:2);
const char *zEnd = z + length;
/* sign * significand * (10 ^ (esign * exponent)) */
int sign = 1; /* sign of significand */
i64 s = 0; /* significand */
@@ -302,57 +260,70 @@ int sqlite3AtoF(const char *z, double *pResult){
double result;
int nDigits = 0;
if( enc==SQLITE_UTF16BE ) z++;
/* skip leading spaces */
while( sqlite3Isspace(*z) ) z++;
while( z<zEnd && sqlite3Isspace(*z) ) z+=incr;
if( z>=zEnd ){
*pResult = 0.0;
return 0;
}
/* get sign of significand */
if( *z=='-' ){
sign = -1;
z++;
z+=incr;
}else if( *z=='+' ){
z++;
z+=incr;
}
/* skip leading zeroes */
while( z[0]=='0' ) z++, nDigits++;
while( z<zEnd && z[0]=='0' ) z+=incr, nDigits++;
/* copy max significant digits to significand */
while( sqlite3Isdigit(*z) && s<((LARGEST_INT64-9)/10) ){
while( z<zEnd && sqlite3Isdigit(*z) && s<((LARGEST_INT64-9)/10) ){
s = s*10 + (*z - '0');
z++, nDigits++;
z+=incr, nDigits++;
}
/* skip non-significant significand digits
** (increase exponent by d to shift decimal left) */
while( sqlite3Isdigit(*z) ) z++, nDigits++, d++;
while( z<zEnd && sqlite3Isdigit(*z) ) z+=incr, nDigits++, d++;
if( z>=zEnd ) goto do_atof_calc;
/* if decimal point is present */
if( *z=='.' ){
z++;
z+=incr;
/* copy digits from after decimal to significand
** (decrease exponent by d to shift decimal right) */
while( sqlite3Isdigit(*z) && s<((LARGEST_INT64-9)/10) ){
while( z<zEnd && sqlite3Isdigit(*z) && s<((LARGEST_INT64-9)/10) ){
s = s*10 + (*z - '0');
z++, nDigits++, d--;
z+=incr, nDigits++, d--;
}
/* skip non-significant digits */
while( sqlite3Isdigit(*z) ) z++, nDigits++;
while( z<zEnd && sqlite3Isdigit(*z) ) z+=incr, nDigits++;
}
if( z>=zEnd ) goto do_atof_calc;
/* if exponent is present */
if( *z=='e' || *z=='E' ){
z++;
z+=incr;
if( z>=zEnd ) goto do_atof_calc;
/* get sign of exponent */
if( *z=='-' ){
esign = -1;
z++;
z+=incr;
}else if( *z=='+' ){
z++;
z+=incr;
}
/* copy digits to exponent */
while( sqlite3Isdigit(*z) ){
while( z<zEnd && sqlite3Isdigit(*z) ){
e = e*10 + (*z - '0');
z++;
z+=incr;
}
}
do_atof_calc:
/* adjust exponent by d, and update sign */
e = (e*esign) + d;
if( e<0 ) {
@@ -411,10 +382,10 @@ int sqlite3AtoF(const char *z, double *pResult){
/* store the result */
*pResult = result;
/* return number of characters used */
return (int)(z - zBegin);
/* return number of bytes used */
return z>=zEnd && sqlite3Isdigit(z[-incr]);
#else
return sqlite3Atoi64(z, pResult);
return sqlite3Atoi64(z, pResult, length, enc);
#endif /* SQLITE_OMIT_FLOATING_POINT */
}
@@ -427,15 +398,20 @@ int sqlite3AtoF(const char *z, double *pResult){
** in the values of the last digit if the only difference is in the
** last digit. So, for example,
**
** compare2pow63("9223372036854775800")
** compare2pow63("9223372036854775800", 1)
**
** will return -8.
*/
static int compare2pow63(const char *zNum){
int c;
c = memcmp(zNum,"922337203685477580",18)*10;
static int compare2pow63(const char *zNum, int incr){
int c = 0;
int i;
/* 012345678901234567 */
const char *pow63 = "922337203685477580";
for(i=0; c==0 && i<18; i++){
c = (zNum[i*incr]-pow63[i])*10;
}
if( c==0 ){
c = zNum[18] - '8';
c = zNum[18*incr] - '8';
testcase( c==(-1) );
testcase( c==0 );
testcase( c==(+1) );
@@ -450,46 +426,52 @@ static int compare2pow63(const char *zNum){
** or is an integer that is too large to be expressed with 64 bits,
** then return false.
**
** When this routine was originally written it dealt with only
** 32-bit numbers. At that time, it was much faster than the
** atoi() library routine in RedHat 7.2.
** length is the number of bytes in the string (bytes, not characters).
** The string is not necessarily zero-terminated. The encoding is
** given by enc.
*/
int sqlite3Atoi64(const char *zNum, i64 *pNum){
int sqlite3Atoi64(const char *zNum, i64 *pNum, int length, u8 enc){
int incr = (enc==SQLITE_UTF8?1:2);
i64 v = 0;
int neg;
int i, c;
int neg = 0;
int i;
int c = 0;
const char *zStart;
while( sqlite3Isspace(*zNum) ) zNum++;
const char *zEnd = zNum + length;
if( enc==SQLITE_UTF16BE ) zNum++;
while( zNum<zEnd && sqlite3Isspace(*zNum) ) zNum+=incr;
if( zNum>=zEnd ) goto do_atoi_calc;
if( *zNum=='-' ){
neg = 1;
zNum++;
zNum+=incr;
}else if( *zNum=='+' ){
neg = 0;
zNum++;
zNum+=incr;
}else{
neg = 0;
}
do_atoi_calc:
zStart = zNum;
while( zNum[0]=='0' ){ zNum++; } /* Skip over leading zeros. Ticket #2454 */
for(i=0; (c=zNum[i])>='0' && c<='9'; i++){
while( zNum<zEnd && zNum[0]=='0' ){ zNum+=incr; } /* Skip leading zeros. */
for(i=0; &zNum[i]<zEnd && (c=zNum[i])>='0' && c<='9'; i+=incr){
v = v*10 + c - '0';
}
*pNum = neg ? -v : v;
testcase( i==18 );
testcase( i==19 );
testcase( i==20 );
if( c!=0 || (i==0 && zStart==zNum) || i>19 ){
if( (c!=0 && &zNum[i]<zEnd) || (i==0 && zStart==zNum) || i>19*incr ){
/* zNum is empty or contains non-numeric text or is longer
** than 19 digits (thus guaranting that it is too large) */
return 0;
}else if( i<19 ){
}else if( i<19*incr ){
/* Less than 19 digits, so we know that it fits in 64 bits */
return 1;
}else{
/* 19-digit numbers must be no larger than 9223372036854775807 if positive
** or 9223372036854775808 if negative. Note that 9223372036854665808
** is 2^63. */
return compare2pow63(zNum)<neg;
return compare2pow63(zNum, incr)<neg;
}
}
@@ -532,7 +514,7 @@ int sqlite3FitsIn64Bits(const char *zNum, int negFlag){
return 0;
}else{
/* Compare against 2^63. */
return compare2pow63(zNum)<neg;
return compare2pow63(zNum, 1)<neg;
}
}