1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00

Merge pull request #1677 from tntnatbry/MCOL-4177-2

MCOL-4177 Add support for bulk insertion for wide decimals.
This commit is contained in:
Roman Nozdrin 2020-12-18 12:37:23 +03:00 committed by GitHub
commit bfe90be3db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 205 additions and 299 deletions

View File

@ -57,7 +57,9 @@ using namespace dataconvert;
namespace datatypes
{
int128_t SystemCatalog::TypeAttributesStd::decimal128FromString(const std::string& value) const
int128_t
SystemCatalog::TypeAttributesStd::decimal128FromString(
const std::string& value, bool *saturate) const
{
int128_t result = 0;
bool pushWarning = false;
@ -67,7 +69,8 @@ int128_t SystemCatalog::TypeAttributesStd::decimal128FromString(const std::strin
*this,
pushWarning,
noRoundup,
result);
result,
saturate);
return result;
}

View File

@ -167,10 +167,15 @@ public:
scale(0),
precision(-1)
{}
TypeAttributesStd(int32_t w, int32_t s, int32_t p)
:colWidth(w),
scale(s),
precision(p)
{}
/**
@brief Convenience method to get int128 from a std::string.
*/
int128_t decimal128FromString(const std::string& value) const;
int128_t decimal128FromString(const std::string& value, bool *saturate = 0) const;
/**
@brief The method sets the legacy scale and precision of a wide decimal

View File

@ -188,6 +188,20 @@ public:
class WriteBatchFieldMariaDB: public WriteBatchField
{
// Maximum number of decimal digits that can be represented in 4 bytes
static const int DIG_PER_DEC = 9;
// See strings/decimal.c
const int dig2bytes[DIG_PER_DEC+1]={0, 1, 1, 2, 2, 3, 3, 4, 4, 4};
// Returns the number of bytes required to store a given number
// of decimal digits
int numDecimalBytes(int digits)
{
return (((digits/DIG_PER_DEC) * 4) + dig2bytes[digits % DIG_PER_DEC]);
}
public:
Field *m_field;
const CalpontSystemCatalog::ColType &m_type;
@ -539,264 +553,28 @@ public:
size_t ColWriteBatchXDecimal(const uchar *buf, bool nullVal, ColBatchWriter &ci) override
{
uint bytesBefore = 1;
uint totalBytes = 9;
switch (m_type.precision)
{
case 18:
case 17:
case 16:
{
totalBytes = 8;
break;
}
case 15:
case 14:
{
totalBytes = 7;
break;
}
case 13:
case 12:
{
totalBytes = 6;
break;
}
case 11:
{
totalBytes = 5;
break;
}
case 10:
{
totalBytes = 5;
break;
}
case 9:
case 8:
case 7:
{
totalBytes = 4;
break;
}
case 6:
case 5:
{
totalBytes = 3;
break;
}
case 4:
case 3:
{
totalBytes = 2;
break;
}
case 2:
case 1:
{
totalBytes = 1;
break;
}
default:
break;
}
switch (m_type.scale)
{
case 0:
{
bytesBefore = totalBytes;
break;
}
case 1: //1 byte for digits after decimal point
{
if ((m_type.precision != 16) && (m_type.precision != 14)
&& (m_type.precision != 12) && (m_type.precision != 10)
&& (m_type.precision != 7) && (m_type.precision != 5)
&& (m_type.precision != 3) && (m_type.precision != 1))
totalBytes++;
bytesBefore = totalBytes - 1;
break;
}
case 2: //1 byte for digits after decimal point
{
if ((m_type.precision == 18) || (m_type.precision == 9))
totalBytes++;
bytesBefore = totalBytes - 1;
break;
}
case 3: //2 bytes for digits after decimal point
{
if ((m_type.precision != 16) && (m_type.precision != 14)
&& (m_type.precision != 12) && (m_type.precision != 7)
&& (m_type.precision != 5) && (m_type.precision != 3))
totalBytes++;
bytesBefore = totalBytes - 2;
break;
}
case 4:
{
if ((m_type.precision == 18) || (m_type.precision == 11)
|| (m_type.precision == 9))
totalBytes++;
bytesBefore = totalBytes - 2;
break;
}
case 5:
{
if ((m_type.precision != 16) && (m_type.precision != 14)
&& (m_type.precision != 7) && (m_type.precision != 5))
totalBytes++;
bytesBefore = totalBytes - 3;
break;
}
case 6:
{
if ((m_type.precision == 18) || (m_type.precision == 13)
|| (m_type.precision == 11) || (m_type.precision == 9))
totalBytes++;
bytesBefore = totalBytes - 3;
break;
}
case 7:
{
if ((m_type.precision != 16) && (m_type.precision != 7))
totalBytes++;
bytesBefore = totalBytes - 4;
break;
}
case 8:
{
if ((m_type.precision == 18) || (m_type.precision == 15)
|| (m_type.precision == 13) || (m_type.precision == 11)
|| (m_type.precision == 9))
totalBytes++;
bytesBefore = totalBytes - 4;;
break;
}
case 9:
{
bytesBefore = totalBytes - 4;;
break;
}
case 10:
{
if ((m_type.precision != 16) && (m_type.precision != 14)
&& (m_type.precision != 12) && (m_type.precision != 10))
totalBytes++;
bytesBefore = totalBytes - 5;;
break;
}
case 11:
{
if (m_type.precision == 18)
totalBytes++;
bytesBefore = totalBytes - 5;
break;
}
case 12:
{
if ((m_type.precision != 16) && (m_type.precision != 14)
&& (m_type.precision != 12))
totalBytes++;
bytesBefore = totalBytes - 6;
break;
}
case 13:
{
if (m_type.precision == 18)
totalBytes++;
bytesBefore = totalBytes - 6;
break;
}
case 14:
{
if ((m_type.precision != 16) && (m_type.precision != 14))
totalBytes++;
bytesBefore = totalBytes - 7;
break;
}
case 15:
{
if (m_type.precision == 18)
totalBytes++;
bytesBefore = totalBytes - 7;
break;
}
case 16:
{
if (m_type.precision != 16)
totalBytes++;
bytesBefore = totalBytes - 8;
break;
}
case 17:
{
if (m_type.precision == 18)
totalBytes++;
bytesBefore = totalBytes - 8;
break;
}
case 18:
{
bytesBefore = totalBytes - 8;
break;
}
default:
break;
}
uint bytesBefore = numDecimalBytes(m_type.precision - m_type.scale);
uint totalBytes = bytesBefore + numDecimalBytes(m_type.scale);
if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT))
{
fprintf(ci.filePtr(), "%c", ci.delimiter());
//printf("|");
}
else if (m_type.precision > datatypes::INT64MAXPRECISION)
{
// TODO MCOL-641 The below else block for narrow decimal
// i.e. (m_type.precision <= datatypes::INT64MAXPRECISION)
// converts the decimal binary representation in buf directly
// to a string, while here, the my_decimal ctor first calls
// bin2decimal() on buf, and then we construct the string from
// the my_decimal. This approach might be a bit slower than the
// narrow decimal approach.
my_decimal dec(buf, m_type.precision, m_type.scale);
String str;
dec.to_string(&str);
fprintf(ci.filePtr(), "%s%c", str.c_ptr(), ci.delimiter());
}
else
{
uint32_t mask [5] = {0, 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF};

View File

@ -111,7 +111,7 @@ void number_int_value(const string& data,
const datatypes::SystemCatalog::TypeAttributesStd& ct,
bool& pushwarning,
bool noRoundup,
T& intVal)
T& intVal, bool* saturate)
{
// copy of the original input
string valStr(data);
@ -304,11 +304,17 @@ void number_int_value(const string& data,
{
intVal = MIN_TINYINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
else if (intVal > MAX_TINYINT)
{
intVal = MAX_TINYINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
break;
@ -318,11 +324,17 @@ void number_int_value(const string& data,
{
intVal = MIN_SMALLINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
else if (intVal > MAX_SMALLINT)
{
intVal = MAX_SMALLINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
break;
@ -332,11 +344,17 @@ void number_int_value(const string& data,
{
intVal = MIN_MEDINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
else if (intVal > MAX_MEDINT)
{
intVal = MAX_MEDINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
break;
@ -346,11 +364,17 @@ void number_int_value(const string& data,
{
intVal = MIN_INT;
pushwarning = true;
if (saturate)
*saturate = true;
}
else if (intVal > MAX_INT)
{
intVal = MAX_INT;
pushwarning = true;
if (saturate)
*saturate = true;
}
break;
@ -360,6 +384,9 @@ void number_int_value(const string& data,
{
intVal = MIN_BIGINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
break;
@ -374,6 +401,9 @@ void number_int_value(const string& data,
{
intVal = tmp + 2;
pushwarning = true;
if (saturate)
*saturate = true;
}
}
else if (ct.colWidth == 8)
@ -382,6 +412,9 @@ void number_int_value(const string& data,
{
intVal = MIN_BIGINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
}
else if (ct.colWidth == 4)
@ -390,11 +423,17 @@ void number_int_value(const string& data,
{
intVal = MIN_INT;
pushwarning = true;
if (saturate)
*saturate = true;
}
else if (intVal > MAX_INT)
{
intVal = MAX_INT;
pushwarning = true;
if (saturate)
*saturate = true;
}
}
else if (ct.colWidth == 2)
@ -403,11 +442,17 @@ void number_int_value(const string& data,
{
intVal = MIN_SMALLINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
else if (intVal > MAX_SMALLINT)
{
intVal = MAX_SMALLINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
}
else if (ct.colWidth == 1)
@ -416,11 +461,17 @@ void number_int_value(const string& data,
{
intVal = MIN_TINYINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
else if (intVal > MAX_TINYINT)
{
intVal = MAX_TINYINT;
pushwarning = true;
if (saturate)
*saturate = true;
}
}
@ -454,11 +505,17 @@ void number_int_value(const string& data,
{
intVal = rangeUp;
pushwarning = true;
if (saturate)
*saturate = true;
}
else if (intVal < rangeLow)
{
intVal = rangeLow;
pushwarning = true;
if (saturate)
*saturate = true;
}
}
}
@ -470,7 +527,7 @@ void number_int_value<int64_t>(const std::string& data,
const datatypes::SystemCatalog::TypeAttributesStd& ct,
bool& pushwarning,
bool noRoundup,
int64_t& intVal);
int64_t& intVal, bool* saturate);
template
void number_int_value<int128_t>(const std::string& data,
@ -478,7 +535,7 @@ void number_int_value<int128_t>(const std::string& data,
const datatypes::SystemCatalog::TypeAttributesStd& ct,
bool& pushwarning,
bool noRoundup,
int128_t& intVal);
int128_t& intVal, bool* saturate);
uint64_t number_uint_value(const string& data,
cscDataType typeCode,

View File

@ -882,7 +882,7 @@ void number_int_value(const std::string& data,
const datatypes::SystemCatalog::TypeAttributesStd &ct,
bool& pushwarning,
bool noRoundup,
T& intVal);
T& intVal, bool* saturate = 0);
uint64_t number_uint_value(const string& data,
cscDataType typeCode,

View File

@ -940,9 +940,7 @@ void BulkLoadBuffer::convert(char* field, int fieldLength,
// BIG INT
//----------------------------------------------------------------------
case WriteEngine::WR_LONGLONG:
case WriteEngine::WR_BINARY:
{
// TODO MCOL-641 Add full support here.
bool bSatVal = false;
if ( column.dataType != CalpontSystemCatalog::DATETIME &&
@ -987,18 +985,9 @@ void BulkLoadBuffer::convert(char* field, int fieldLength,
if ( (column.dataType == CalpontSystemCatalog::DECIMAL) ||
(column.dataType == CalpontSystemCatalog::UDECIMAL))
{
if (LIKELY(width == datatypes::MAXDECIMALWIDTH))
{
bool saturate = false;
bigllVal = dataconvert::string_to_ll<int128_t>(string(field), saturate);
// TODO MCOL-641 check saturate
}
else if (width <= 8)
{
// errno is initialized and set in convertDecimalString
llVal = Convertor::convertDecimalString(
field, fieldLength, column.scale );
}
// errno is initialized and set in convertDecimalString
llVal = Convertor::convertDecimalString(
field, fieldLength, column.scale );
}
else
{
@ -1024,31 +1013,17 @@ void BulkLoadBuffer::convert(char* field, int fieldLength,
bSatVal = true;
}
if (bSatVal)
bufStats.satCount++;
// Update min/max range
if (width <= 8)
{
if (llVal < bufStats.minBufferVal)
bufStats.minBufferVal = llVal;
if (llVal < bufStats.minBufferVal)
bufStats.minBufferVal = llVal;
if (llVal > bufStats.maxBufferVal)
bufStats.maxBufferVal = llVal;
if (llVal > bufStats.maxBufferVal)
bufStats.maxBufferVal = llVal;
pVal = &llVal;
}
else
{
if (bigllVal < bufStats.bigMinBufferVal)
bufStats.bigMinBufferVal = bigllVal;
if (bigllVal > bufStats.bigMaxBufferVal)
bufStats.bigMaxBufferVal = bigllVal;
pVal = &bigllVal;
}
pVal = &llVal;
}
else if (column.dataType == CalpontSystemCatalog::TIME)
{
@ -1212,6 +1187,75 @@ void BulkLoadBuffer::convert(char* field, int fieldLength,
break;
}
//----------------------------------------------------------------------
// WIDE DECIMAL
//----------------------------------------------------------------------
case WriteEngine::WR_BINARY:
{
bool bSatVal = false;
if (nullFlag)
{
if (!column.autoIncFlag)
{
if (column.fWithDefault)
{
bigllVal = column.fDefaultWideDecimal;
// fall through to update saturation and min/max
}
else
{
bigllVal = datatypes::Decimal128Null;
pVal = &bigllVal;
break;
}
}
else
{
// TODO MCOL-641 Add support for int128_t version of
// fAutoIncNextValue
bigllVal = fAutoIncNextValue++;
}
}
else
{
if (fImportDataMode != IMPORT_DATA_TEXT)
{
memcpy(&bigllVal, field, sizeof(bigllVal));
}
else
{
if (isTrueWord(const_cast<const char*>(field), fieldLength))
{
strcpy(field, "1");
fieldLength = 1;
}
bool dummy = false;
// Value saturation to 9999... or -9999... is handled by
// number_int_value(), and the bSatVal flag is set to true
dataconvert::number_int_value(string(field), column.dataType,
datatypes::SystemCatalog::TypeAttributesStd(
column.width, column.scale, column.precision),
dummy, false, bigllVal, &bSatVal);
}
}
if (bSatVal)
bufStats.satCount++;
// Update min/max range
if (bigllVal < bufStats.bigMinBufferVal)
bufStats.bigMinBufferVal = bigllVal;
if (bigllVal > bufStats.bigMaxBufferVal)
bufStats.bigMaxBufferVal = bigllVal;
pVal = &bigllVal;
break;
}
//----------------------------------------------------------------------
// UNSIGNED BIG INT
//----------------------------------------------------------------------
@ -3395,6 +3439,14 @@ bool BulkLoadBuffer::isBinaryFieldNull(void* val,
break;
}
case WriteEngine::WR_BINARY:
{
if ((*((int128_t*)val)) == datatypes::Decimal128Null)
isNullFlag = true;
break;
}
default:
{
break;

View File

@ -204,6 +204,7 @@ ColumnInfo::ColumnInfo(Log* logger,
case WriteEngine::WR_ULONGLONG:
case WriteEngine::WR_UMEDINT:
case WriteEngine::WR_UINT:
case WriteEngine::WR_BINARY:
default:
{
fColExtInf = new ColExtInf(column.mapOid, logger);

View File

@ -367,6 +367,7 @@ struct JobColumn /** @brief Job Column Structure */
long long fDefaultInt; /** @brief Integer column default */
unsigned long long fDefaultUInt; /** @brief UnsignedInt col default*/
double fDefaultDbl; /** @brief Dbl/Flt column default */
int128_t fDefaultWideDecimal; /** @brief Wide decimal column default */
std::string fDefaultChr; /** @brief Char column default */
JobColumn() : mapOid(0), dataType(execplan::CalpontSystemCatalog::INT), weType(WR_INT),
typeName("integer"), emptyVal(0),
@ -376,7 +377,8 @@ struct JobColumn /** @brief Job Column Structure */
compressionType(0), autoIncFlag(false),
fMinIntSat(0), fMaxIntSat(0),
fMinDblSat(0), fMaxDblSat(0), fWithDefault(false),
fDefaultInt(0), fDefaultUInt(0), fDefaultDbl(0.0)
fDefaultInt(0), fDefaultUInt(0), fDefaultDbl(0.0),
fDefaultWideDecimal(0)
{ }
};

View File

@ -1087,13 +1087,21 @@ void XMLJob::fillInXMLDataNotNullDefault(
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
col.fDefaultInt = Convertor::convertDecimalString(
col_defaultValue.c_str(),
col_defaultValue.length(),
colType.scale);
if (LIKELY(colType.colWidth == datatypes::MAXDECIMALWIDTH))
{
col.fDefaultWideDecimal = colType.decimal128FromString(
col_defaultValue, &bDefaultConvertError);
}
else
{
col.fDefaultInt = Convertor::convertDecimalString(
col_defaultValue.c_str(),
col_defaultValue.length(),
colType.scale);
if (errno == ERANGE)
bDefaultConvertError = true;
if (errno == ERANGE)
bDefaultConvertError = true;
}
break;
}