1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-641 This commit changes NULL and EMPTY values.

It also contains the refactored DataConvert::decimalToString().

Row::toString UT is finished.
This commit is contained in:
Roman Nozdrin
2020-02-21 17:47:04 +00:00
parent de85e21c38
commit c23ead2703
7 changed files with 194 additions and 47 deletions

View File

@ -414,15 +414,15 @@ template<>
inline bool isNullVal<16>(uint8_t type, const uint8_t* ival) // For BINARY inline bool isNullVal<16>(uint8_t type, const uint8_t* ival) // For BINARY
{ {
const uint64_t* val = reinterpret_cast<const uint64_t*>(ival); const uint64_t* val = reinterpret_cast<const uint64_t*>(ival);
return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYNULL)); return ((val[0] == joblist::BINARYNULL) && (val[1] == joblist::BINARYEMPTYROW));
} }
template<> template<>
inline bool isNullVal<32>(uint8_t type, const uint8_t* ival) // For BINARY inline bool isNullVal<32>(uint8_t type, const uint8_t* ival) // For BINARY
{ {
const uint64_t* val = reinterpret_cast<const uint64_t*>(ival); const uint64_t* val = reinterpret_cast<const uint64_t*>(ival);
return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYEMPTYROW) return ((val[0] == joblist::BINARYNULL) && (val[1] == joblist::BINARYEMPTYROW)
&& (val[2] == joblist::BINARYEMPTYROW) && (val[3] == joblist::BINARYNULL)); && (val[2] == joblist::BINARYEMPTYROW) && (val[3] == joblist::BINARYEMPTYROW));
} }
template<> template<>

View File

@ -1067,8 +1067,6 @@ void ColumnCommand::getEmptyRowValue(const CSCDataType dataType,
ptr[1] = joblist::BINARYEMPTYROW; ptr[1] = joblist::BINARYEMPTYROW;
} }
void ColumnCommand::getLBIDList(uint32_t loopCount, vector<int64_t>* lbids) void ColumnCommand::getLBIDList(uint32_t loopCount, vector<int64_t>* lbids)
{ {
int64_t firstLBID = lbid, lastLBID = firstLBID + (loopCount * colType.colWidth) - 1, i; int64_t firstLBID = lbid, lastLBID = firstLBID + (loopCount * colType.colWidth) - 1, i;

View File

@ -69,6 +69,18 @@ namespace utils
return 16; return 16;
} }
} }
inline uint8_t precisionByWidth(unsigned w)
{
switch(w)
{
case 16:
return 38;
// In case we will support decimals that spans 32 bytes.
default:
return 65;
}
}
} }

View File

@ -28,6 +28,7 @@
#include <ctime> #include <ctime>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <type_traits>
using namespace std; using namespace std;
#include <boost/algorithm/string/case_conv.hpp> #include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
@ -1164,7 +1165,110 @@ bool stringToTimestampStruct(const string& data, TimeStamp& timeStamp, const str
} }
// WIP MCOL-641 // WIP MCOL-641
// Check for overflows with buflen // Second DT is for POD representation.
template <typename T>
size_t DataConvert::writeIntPart(T* dec, char* p,
const uint16_t buflen,
const uint8_t scale)
{
T intPart = *dec;
if (scale)
{
// optimize this
for (size_t i = 0; i < scale; i++)
intPart /= 10;
}
// optimize for less then uint64_t values
uint64_t div = 10000000000000000000ULL;
T high = intPart;
T low;
low = high % div;
high /= div;
T mid;
mid = high % div;
high /= div;
// pod[0] is high 8 byte, pod[1] is low
uint64_t* high_pod = reinterpret_cast<uint64_t*>(&high);
uint64_t* mid_pod = reinterpret_cast<uint64_t*>(&mid);
uint64_t* low_pod = reinterpret_cast<uint64_t*>(&low);
char* original_p = p;
int written = 0;
// WIP replace snprintf with streams
if (high_pod[0] != 0)
{
written = sprintf(p, "%lu", high_pod[0]);
p += written;
written = sprintf(p, "%019lu", mid_pod[0]);
p += written;
sprintf(p, "%019lu", low_pod[0]);
}
else if (mid_pod[0] != 0)
{
written = sprintf(p, "%lu", mid_pod[0]);
p += written;
written = sprintf(p, "%019lu", low_pod[0]);
p += written;
}
else
{
written = sprintf(p, "%lu", low_pod[0]);
p += written;
}
if (buflen <= p-original_p)
{
throw QueryDataExcept("toString() char buffer overflow.", formatErr);
}
return p-original_p;
}
template<typename T>
size_t DataConvert::writeFractionalPart(T* dec, char* p,
const uint16_t buflen,
const uint8_t scale)
{
T scaleDivisor = 10;
for (size_t i = 1; i < scale; i++)
scaleDivisor *= 10;
T fractionalPart = *dec % scaleDivisor;
return writeIntPart(&fractionalPart, p, buflen, 0);
}
// WIP MCOL-641
// Limit this to Decimal only
// Replace decimalToString with this one
template<typename T>
void DataConvert::toString1(T* dec, char *p, const uint16_t buflen,
const uint8_t scale)
{
if (*dec < static_cast<T>(0))
{
*p++ = '-';
*dec *= -1;
}
char* original_p = p;
size_t written = 0;
written = writeIntPart<T>(dec, p, buflen, scale);
p += written;
// WIP To be finished for 0.042
if (scale)
{
*p++ = '.';
p += writeFractionalPart(dec, p, p-original_p, scale);
}
if (buflen <= p-original_p)
{
throw QueryDataExcept("toString() char buffer overflow.", formatErr);
}
}
template<typename T> template<typename T>
void DataConvert::toString(T* dec, char *p, size_t buflen) void DataConvert::toString(T* dec, char *p, size_t buflen)
{ {
@ -1206,6 +1310,7 @@ void DataConvert::toString(T* dec, char *p, size_t buflen)
if (buflen <= p-original_p) if (buflen <= p-original_p)
std::cout << "DataConvert::toString char buffer overflow" << std::endl; std::cout << "DataConvert::toString char buffer overflow" << std::endl;
} }
// WIP MCOL-641 // WIP MCOL-641
// Template this // Template this
// result must be calloc-ed // result must be calloc-ed
@ -1239,13 +1344,20 @@ void atoi128(const std::string& arg, uint128_t& res)
} }
// WIP MCOL-641 // WIP MCOL-641
// Doesn't work for -0.042
template <typename T> template <typename T>
void DataConvert::decimalToString(T* valuePtr, void DataConvert::decimalToString(T* valuePtr,
uint8_t scale, uint8_t scale,
char* buf, char* buf,
unsigned int buflen, unsigned int buflen,
cscDataType colDataType) cscDataType colDataType) // We don't need the last one
{ {
if (*valuePtr < static_cast<T>(0))
{
*buf++ = '-';
*valuePtr *= -1;
}
toString<T>(valuePtr, buf, buflen); toString<T>(valuePtr, buf, buflen);
// Biggest ColumnStore supports is DECIMAL(38,x), or 38 total digits+dp+sign for column // Biggest ColumnStore supports is DECIMAL(38,x), or 38 total digits+dp+sign for column

View File

@ -1034,8 +1034,19 @@ public:
template <typename T> template <typename T>
EXPORT static void decimalToString(T* value, uint8_t scale, char* buf, unsigned int buflen, cscDataType colDataType); EXPORT static void decimalToString(T* value, uint8_t scale, char* buf, unsigned int buflen, cscDataType colDataType);
template<typename T>
static void toString(T* dec, char *p, size_t buflen);
template <typename T> template <typename T>
EXPORT static void toString(T* dec, char *p, size_t buflen); EXPORT static void toString1(T* dec, char* p, const uint16_t buflen,
const uint8_t scale = 0);
template <typename T>
static size_t writeIntPart(T* dec, char* p, const uint16_t buflen,
const uint8_t scale);
template <typename T>
static size_t writeFractionalPart(T* dec, char* p,
const uint16_t buflen, const uint8_t scale);
static inline void int128Max(int128_t& i) static inline void int128Max(int128_t& i)
{ {

View File

@ -1,8 +1,10 @@
#include <gtest/gtest.h> // googletest header file #include <gtest/gtest.h> // googletest header file
#include <iostream>
#include "rowgroup.h" #include "rowgroup.h"
#include "columnwidth.h" #include "columnwidth.h"
#include "joblisttypes.h" #include "joblisttypes.h"
#include "iostream" #include "dataconvert.h"
#define WIDE_DEC_PRECISION 38U #define WIDE_DEC_PRECISION 38U
#define INITIAL_ROW_OFFSET 2 #define INITIAL_ROW_OFFSET 2
@ -60,7 +62,6 @@ class RowDecimalTest : public ::testing::Test {
false //useStringTable false //useStringTable
); );
//std::cout << inRG.toString() << std::endl;
rg = inRG; rg = inRG;
rgD.reinit(rg); rgD.reinit(rg);
rg.setData(&rgD); rg.setData(&rgD);
@ -74,9 +75,14 @@ class RowDecimalTest : public ::testing::Test {
uint64_t* uint128_pod = reinterpret_cast<uint64_t*>(&nullValue); uint64_t* uint128_pod = reinterpret_cast<uint64_t*>(&nullValue);
uint128_pod[0] = joblist::BINARYEMPTYROW; uint128_pod[0] = joblist::BINARYEMPTYROW;
uint128_pod[1] = joblist::BINARYNULL; uint128_pod[1] = joblist::BINARYNULL;
bigValue = 42*0xFFFFFFFFFFFFFFFFLL; bigValue = -static_cast<int128_t>(0xFFFFFFFF)*0xFFFFFFFFFFFFFFFF;
//char buf[utils::precisionByWidth(16)+3];
sValueVector.push_back(nullValue); //memset(&buf[0], 0, sizeof(buf));
//int scale1 = 3;
//dataconvert::DataConvert::decimalToString(&bigValue, scale1, buf,
// utils::precisionByWidth(sizeof(bigValue))+3,types[0]);
//std::cout << buf << std::endl;
sValueVector.push_back(nullValue-2);
sValueVector.push_back(-42); sValueVector.push_back(-42);
sValueVector.push_back(bigValue); sValueVector.push_back(bigValue);
sValueVector.push_back(0); sValueVector.push_back(0);
@ -106,15 +112,15 @@ class RowDecimalTest : public ::testing::Test {
s32ValueVector.push_back(0x81); s32ValueVector.push_back(0x81);
s32ValueVector.push_back(joblist::INTNULL-1); s32ValueVector.push_back(joblist::INTNULL-1);
s64ValueVector.push_back(joblist::INTNULL); s64ValueVector.push_back(joblist::BIGINTNULL);
s64ValueVector.push_back(-0x79); s64ValueVector.push_back(-0x79);
s64ValueVector.push_back(0); s64ValueVector.push_back(0);
s64ValueVector.push_back(0x81); s64ValueVector.push_back(0x81);
s64ValueVector.push_back(joblist::INTNULL-1); s64ValueVector.push_back(joblist::BIGINTNULL-1);
r.initToNull(); //r.initToNull();
r.nextRow(rowSize); //r.nextRow(rowSize);
for(size_t i = 1; i < sValueVector.size(); i++) { for(size_t i = 0; i < sValueVector.size(); i++) {
r.setBinaryField_offset(&sValueVector[i], r.setBinaryField_offset(&sValueVector[i],
sizeof(sValueVector[0]), offsets[0]); sizeof(sValueVector[0]), offsets[0]);
r.setBinaryField_offset(&uValueVector[i], r.setBinaryField_offset(&uValueVector[i],
@ -123,7 +129,6 @@ class RowDecimalTest : public ::testing::Test {
r.setIntField(s32ValueVector[i], 3); r.setIntField(s32ValueVector[i], 3);
r.setIntField(s16ValueVector[i], 4); r.setIntField(s16ValueVector[i], 4);
r.setIntField(s8ValueVector[i], 5); r.setIntField(s8ValueVector[i], 5);
//std::cout << r.toString() << std::endl;
r.nextRow(rowSize); r.nextRow(rowSize);
} }
rowCount = sValueVector.size(); rowCount = sValueVector.size();
@ -159,6 +164,7 @@ TEST_F(RowDecimalTest, NonNULLValuesCheck) {
TEST_F(RowDecimalTest, initToNullANDisNullValueValueCheck) { TEST_F(RowDecimalTest, initToNullANDisNullValueValueCheck) {
rg.getRow(0, &r); rg.getRow(0, &r);
r.initToNull();
EXPECT_TRUE(r.isNullValue(0)); EXPECT_TRUE(r.isNullValue(0));
EXPECT_TRUE(r.isNullValue(1)); EXPECT_TRUE(r.isNullValue(1));
EXPECT_TRUE(r.isNullValue(2)); EXPECT_TRUE(r.isNullValue(2));
@ -176,29 +182,37 @@ TEST_F(RowDecimalTest, getBinaryFieldCheck) {
for (size_t i = 0; i < sValueVector.size(); i++) { for (size_t i = 0; i < sValueVector.size(); i++) {
s128Value = r.getBinaryField<int128_t>(0); s128Value = r.getBinaryField<int128_t>(0);
EXPECT_EQ(*s128Value, sValueVector[i]); EXPECT_EQ(sValueVector[i], *s128Value);
u128Value = r.getBinaryField<uint128_t>(1); u128Value = r.getBinaryField<uint128_t>(1);
EXPECT_EQ(*u128Value, uValueVector[i]); EXPECT_EQ(uValueVector[i], *u128Value);
//EXPECT_EQ(r.getIntField(2),s64ValueVector[i]); //EXPECT_EQ(s64ValueVector[i], r.getIntField(2));
//EXPECT_EQ(r.getIntField(3),s32ValueVector[i]); //EXPECT_EQ(s32ValueVector[i],r.getIntField(3));
//EXPECT_EQ(r.getIntField(4),s16ValueVector[i]); //EXPECT_EQ(r.getIntField(4),s16ValueVector[i]);
//EXPECT_EQ(r.getIntField(5),s8ValueVector[i]); //EXPECT_EQ(r.getIntField(5),s8ValueVector[i]);
r.nextRow(rowSize); r.nextRow(rowSize);
} }
} }
// TBD Need to add asserts when toString will be finished
TEST_F(RowDecimalTest, toStringCheck) { TEST_F(RowDecimalTest, toStringCheck) {
std::string exemplar1("0: NULL NULL NULL NULL NULL NULL "); std::vector<std::string> exemplarVector;
exemplarVector.push_back(std::string("0: NULL NULL NULL NULL NULL NULL "));
exemplarVector.push_back(std::string("0: -42 42 -121 -121 -121 -121 "));
exemplarVector.push_back(std::string("0: -79228162495817593515539431425 -79228162495817593515539431425 0 0 0 0 "));
exemplarVector.push_back(std::string("0: 0 0 129 129 129 -127 "));
exemplarVector.push_back(std::string("0: -18446744073709551618 -18446744073709551618 9223372036854775807 2147483647 32767 127 "));
rg.getRow(0, &r); rg.getRow(0, &r);
EXPECT_EQ(r.toString(), exemplar1); r.initToNull();
r.nextRow(rowSize); for (auto &el: exemplarVector) {
std::cout << r.toString() << std::endl; EXPECT_EQ(el, r.toString());
r.nextRow(rowSize); r.nextRow(rowSize);
std::cout << r.toString() << std::endl; }
r.nextRow(rowSize);
std::cout << r.toString() << std::endl;
} }
//toString TEST_F(RowDecimalTest, toCSVCheck) {
//toCSV }
//applyMapping TEST_F(RowDecimalTest, applyMappingCheck) {
//equals }
TEST_F(RowDecimalTest, equalsCheck) {
}

View File

@ -642,8 +642,8 @@ string Row::toString() const
{ {
char *buf = (char*)alloca(precision[i] + 3); char *buf = (char*)alloca(precision[i] + 3);
// empty the buffer // empty the buffer
dataconvert::DataConvert::toString<int128_t>(getBinaryField<int128_t>(i), dataconvert::DataConvert::toString(getBinaryField<int128_t>(i),
buf, precision[i]+3); buf, precision[i]+3); //WIP scale[i]
os << buf << " "; os << buf << " ";
break; break;
} }
@ -851,8 +851,8 @@ void Row::initToNull()
case 16 : case 16 :
{ {
uint64_t *dec = reinterpret_cast<uint64_t*>(&data[offsets[i]]); uint64_t *dec = reinterpret_cast<uint64_t*>(&data[offsets[i]]);
dec[0] = joblist::BINARYEMPTYROW; dec[0] = joblist::BINARYNULL;
dec[1] = joblist::BINARYNULL; dec[1] = joblist::BINARYEMPTYROW;
break; break;
} }
default: default:
@ -882,8 +882,8 @@ void Row::initToNull()
case CalpontSystemCatalog::BINARY: case CalpontSystemCatalog::BINARY:
{ {
uint64_t *dec = reinterpret_cast<uint64_t*>(&data[offsets[i]]); uint64_t *dec = reinterpret_cast<uint64_t*>(&data[offsets[i]]);
dec[0] = joblist::BINARYEMPTYROW; dec[0] = joblist::BINARYNULL;
dec[1] = joblist::BINARYNULL; dec[1] = joblist::BINARYEMPTYROW;
} }
break; break;
@ -916,10 +916,10 @@ Row::isNullValue_offset<execplan::CalpontSystemCatalog::BINARY,32>(
uint32_t offset) const uint32_t offset) const
{ {
const int64_t *intPtr = reinterpret_cast<const int64_t*>(&data[offset]); const int64_t *intPtr = reinterpret_cast<const int64_t*>(&data[offset]);
return ((intPtr[0] == static_cast<int64_t>(joblist::BINARYEMPTYROW)) && return ((intPtr[0] == static_cast<int64_t>(joblist::BINARYNULL)) &&
(intPtr[1] == static_cast<int64_t>(joblist::BINARYEMPTYROW)) && (intPtr[1] == static_cast<int64_t>(joblist::BINARYEMPTYROW)) &&
(intPtr[2] == static_cast<int64_t>(joblist::BINARYEMPTYROW)) && (intPtr[2] == static_cast<int64_t>(joblist::BINARYEMPTYROW)) &&
(intPtr[3] == static_cast<int64_t>(joblist::BINARYNULL))); (intPtr[3] == static_cast<int64_t>(joblist::BINARYEMPTYROW)));
} }
template<> template<>
@ -928,8 +928,8 @@ Row::isNullValue_offset<execplan::CalpontSystemCatalog::BINARY,16>(
uint32_t offset) const uint32_t offset) const
{ {
const int64_t *intPtr = reinterpret_cast<const int64_t*>(&data[offset]); const int64_t *intPtr = reinterpret_cast<const int64_t*>(&data[offset]);
return ((intPtr[0] == static_cast<int64_t>(joblist::BINARYEMPTYROW)) return ((intPtr[0] == static_cast<int64_t>(joblist::BINARYNULL))
&& (intPtr[1] == static_cast<int64_t>(joblist::BINARYNULL))); && (intPtr[1] == static_cast<int64_t>(joblist::BINARYEMPTYROW)));
} }
template<> template<>
@ -938,8 +938,8 @@ Row::isNullValue_offset<execplan::CalpontSystemCatalog::DECIMAL,16>(
uint32_t offset) const uint32_t offset) const
{ {
const int64_t *intPtr = reinterpret_cast<const int64_t*>(&data[offset]); const int64_t *intPtr = reinterpret_cast<const int64_t*>(&data[offset]);
return ((intPtr[0] == static_cast<int64_t>(joblist::BINARYEMPTYROW)) return ((intPtr[0] == static_cast<int64_t>(joblist::BINARYNULL))
&& (intPtr[1] == static_cast<int64_t>(joblist::BINARYNULL))); && (intPtr[1] == static_cast<int64_t>(joblist::BINARYEMPTYROW)));
} }
template<> template<>