You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
Merge branch 'develop-1.1' into 1.1-merge-up-20180509-a2
This commit is contained in:
@ -82,10 +82,10 @@ StringStore::~StringStore()
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32_t StringStore::storeString(const uint8_t* data, uint32_t len)
|
||||
uint64_t StringStore::storeString(const uint8_t* data, uint32_t len)
|
||||
{
|
||||
MemChunk* lastMC = NULL;
|
||||
uint32_t ret = 0;
|
||||
uint64_t ret = 0;
|
||||
|
||||
empty = false; // At least a NULL is being stored.
|
||||
|
||||
@ -95,7 +95,7 @@ uint32_t StringStore::storeString(const uint8_t* data, uint32_t len)
|
||||
|
||||
if ((len == 8 || len == 9) &&
|
||||
*((uint64_t*) data) == *((uint64_t*) joblist::CPNULLSTRMARK.c_str()))
|
||||
return numeric_limits<uint32_t>::max();
|
||||
return numeric_limits<uint64_t>::max();
|
||||
|
||||
//@bug6065, make StringStore::storeString() thread safe
|
||||
boost::mutex::scoped_lock lk(fMutex, defer_lock);
|
||||
@ -106,20 +106,21 @@ uint32_t StringStore::storeString(const uint8_t* data, uint32_t len)
|
||||
if (mem.size() > 0)
|
||||
lastMC = (MemChunk*) mem.back().get();
|
||||
|
||||
if (len >= CHUNK_SIZE)
|
||||
if ((len + 4) >= CHUNK_SIZE)
|
||||
{
|
||||
shared_array<uint8_t> newOne(new uint8_t[len + sizeof(MemChunk)]);
|
||||
shared_array<uint8_t> newOne(new uint8_t[len + sizeof(MemChunk) + 4]);
|
||||
longStrings.push_back(newOne);
|
||||
lastMC = (MemChunk*) longStrings.back().get();
|
||||
lastMC->capacity = lastMC->currentSize = len;
|
||||
memcpy(lastMC->data, data, len);
|
||||
lastMC->capacity = lastMC->currentSize = len + 4;
|
||||
memcpy(lastMC->data, &len, 4);
|
||||
memcpy(lastMC->data + 4, data, len);
|
||||
// High bit to mark a long string
|
||||
ret = 0x80000000;
|
||||
ret = 0x8000000000000000;
|
||||
ret += longStrings.size() - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((lastMC == NULL) || (lastMC->capacity - lastMC->currentSize < len))
|
||||
if ((lastMC == NULL) || (lastMC->capacity - lastMC->currentSize < (len + 4)))
|
||||
{
|
||||
// mem usage debugging
|
||||
//if (lastMC)
|
||||
@ -134,7 +135,13 @@ uint32_t StringStore::storeString(const uint8_t* data, uint32_t len)
|
||||
|
||||
|
||||
ret = ((mem.size() - 1) * CHUNK_SIZE) + lastMC->currentSize;
|
||||
memcpy(&(lastMC->data[lastMC->currentSize]), data, len);
|
||||
|
||||
// If this ever happens then we have big problems
|
||||
if (ret & 0x8000000000000000)
|
||||
throw logic_error("StringStore memory exceeded.");
|
||||
|
||||
memcpy(&(lastMC->data[lastMC->currentSize]), &len, 4);
|
||||
memcpy(&(lastMC->data[lastMC->currentSize]) + 4, data, len);
|
||||
/*
|
||||
cout << "stored: '" << hex;
|
||||
for (uint32_t i = 0; i < len ; i++) {
|
||||
@ -142,7 +149,7 @@ uint32_t StringStore::storeString(const uint8_t* data, uint32_t len)
|
||||
}
|
||||
cout << "' at position " << lastMC->currentSize << " len " << len << dec << endl;
|
||||
*/
|
||||
lastMC->currentSize += len;
|
||||
lastMC->currentSize += len + 4;
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -150,35 +157,35 @@ uint32_t StringStore::storeString(const uint8_t* data, uint32_t len)
|
||||
|
||||
void StringStore::serialize(ByteStream& bs) const
|
||||
{
|
||||
uint32_t i;
|
||||
uint64_t i;
|
||||
MemChunk* mc;
|
||||
|
||||
bs << (uint32_t) mem.size();
|
||||
bs << (uint64_t) mem.size();
|
||||
bs << (uint8_t) empty;
|
||||
|
||||
for (i = 0; i < mem.size(); i++)
|
||||
{
|
||||
mc = (MemChunk*) mem[i].get();
|
||||
bs << (uint32_t) mc->currentSize;
|
||||
bs << (uint64_t) mc->currentSize;
|
||||
//cout << "serialized " << mc->currentSize << " bytes\n";
|
||||
bs.append(mc->data, mc->currentSize);
|
||||
}
|
||||
|
||||
bs << (uint32_t) longStrings.size();
|
||||
bs << (uint64_t) longStrings.size();
|
||||
|
||||
for (i = 0; i < longStrings.size(); i++)
|
||||
{
|
||||
mc = (MemChunk*) longStrings[i].get();
|
||||
bs << (uint32_t) mc->currentSize;
|
||||
bs << (uint64_t) mc->currentSize;
|
||||
bs.append(mc->data, mc->currentSize);
|
||||
}
|
||||
}
|
||||
|
||||
void StringStore::deserialize(ByteStream& bs)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t count;
|
||||
uint32_t size;
|
||||
uint64_t i;
|
||||
uint64_t count;
|
||||
uint64_t size;
|
||||
uint8_t* buf;
|
||||
MemChunk* mc;
|
||||
uint8_t tmp8;
|
||||
@ -856,10 +863,9 @@ bool Row::isNullValue(uint32_t colIndex) const
|
||||
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
uint32_t offset, length;
|
||||
offset = *((uint32_t*) &data[offsets[colIndex]]);
|
||||
length = *((uint32_t*) &data[offsets[colIndex] + 4]);
|
||||
return strings->isNullValue(offset, length);
|
||||
uint64_t offset;
|
||||
offset = *((uint64_t*) &data[offsets[colIndex]]);
|
||||
return strings->isNullValue(offset);
|
||||
}
|
||||
|
||||
if (data[offsets[colIndex]] == 0) // empty string
|
||||
@ -922,10 +928,9 @@ bool Row::isNullValue(uint32_t colIndex) const
|
||||
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
uint32_t offset, length;
|
||||
offset = *((uint32_t*) &data[pos]);
|
||||
length = *((uint32_t*) &data[pos + 4]);
|
||||
return strings->isNullValue(offset, length);
|
||||
uint64_t offset;
|
||||
offset = *((uint64_t*) &data[pos]);
|
||||
return strings->isNullValue(offset);
|
||||
}
|
||||
|
||||
if (*((uint16_t*) &data[pos]) == 0)
|
||||
@ -1720,8 +1725,8 @@ RGData RowGroup::duplicate()
|
||||
|
||||
void Row::setStringField(const std::string& val, uint32_t colIndex)
|
||||
{
|
||||
uint32_t length;
|
||||
uint32_t offset;
|
||||
uint64_t offset;
|
||||
uint64_t length;
|
||||
|
||||
//length = strlen(val.c_str()) + 1;
|
||||
length = val.length();
|
||||
@ -1732,8 +1737,7 @@ void Row::setStringField(const std::string& val, uint32_t colIndex)
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
offset = strings->storeString((const uint8_t*) val.data(), length);
|
||||
*((uint32_t*) &data[offsets[colIndex]]) = offset;
|
||||
*((uint32_t*) &data[offsets[colIndex] + 4]) = length;
|
||||
*((uint64_t*) &data[offsets[colIndex]]) = offset;
|
||||
// cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
|
||||
// << " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
|
||||
// << endl;
|
||||
|
@ -92,13 +92,14 @@ public:
|
||||
StringStore();
|
||||
virtual ~StringStore();
|
||||
|
||||
inline std::string getString(uint32_t offset, uint32_t length) const;
|
||||
uint32_t storeString(const uint8_t* data, uint32_t length); //returns the offset
|
||||
inline const uint8_t* getPointer(uint32_t offset) const;
|
||||
inline std::string getString(uint64_t offset) const;
|
||||
uint64_t storeString(const uint8_t* data, uint32_t length); //returns the offset
|
||||
inline const uint8_t* getPointer(uint64_t offset) const;
|
||||
inline uint32_t getStringLength(uint64_t offset);
|
||||
inline bool isEmpty() const;
|
||||
inline uint64_t getSize() const;
|
||||
inline bool isNullValue(uint32_t offset, uint32_t length) const;
|
||||
inline bool equals(const std::string& str, uint32_t offset, uint32_t length) const;
|
||||
inline bool isNullValue(uint64_t offset) const;
|
||||
inline bool equals(const std::string& str, uint64_t offset) const;
|
||||
|
||||
void clear();
|
||||
|
||||
@ -615,9 +616,8 @@ inline bool Row::equals(const std::string& val, uint32_t colIndex) const
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
uint32_t offset = *((uint32_t*) &data[offsets[colIndex]]);
|
||||
uint32_t length = *((uint32_t*) &data[offsets[colIndex] + 4]);
|
||||
return strings->equals(val, offset, length);
|
||||
uint64_t offset = *((uint64_t*) &data[offsets[colIndex]]);
|
||||
return strings->equals(val, offset);
|
||||
}
|
||||
else
|
||||
return (strncmp(val.c_str(), (char*) &data[offsets[colIndex]], getColumnWidth(colIndex)) == 0);
|
||||
@ -719,7 +719,7 @@ inline int64_t Row::getIntField(uint32_t colIndex) const
|
||||
inline const uint8_t* Row::getStringPointer(uint32_t colIndex) const
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
return strings->getPointer(*((uint32_t*) &data[offsets[colIndex]]));
|
||||
return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
|
||||
|
||||
return &data[offsets[colIndex]];
|
||||
}
|
||||
@ -727,14 +727,14 @@ inline const uint8_t* Row::getStringPointer(uint32_t colIndex) const
|
||||
inline uint32_t Row::getStringLength(uint32_t colIndex) const
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
return *((uint32_t*) &data[offsets[colIndex] + 4]);
|
||||
return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
|
||||
|
||||
return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
|
||||
}
|
||||
|
||||
inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
|
||||
{
|
||||
uint32_t offset;
|
||||
uint64_t offset;
|
||||
|
||||
if (length > getColumnWidth(colIndex))
|
||||
length = getColumnWidth(colIndex);
|
||||
@ -742,8 +742,7 @@ inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
offset = strings->storeString(strdata, length);
|
||||
*((uint32_t*) &data[offsets[colIndex]]) = offset;
|
||||
*((uint32_t*) &data[offsets[colIndex] + 4]) = length;
|
||||
*((uint64_t*) &data[offsets[colIndex]]) = offset;
|
||||
// cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
|
||||
// << " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
|
||||
// << endl;
|
||||
@ -759,8 +758,7 @@ inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_
|
||||
inline std::string Row::getStringField(uint32_t colIndex) const
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
return strings->getString(*((uint32_t*) &data[offsets[colIndex]]),
|
||||
*((uint32_t*) &data[offsets[colIndex] + 4]));
|
||||
return strings->getString(*((uint64_t*) &data[offsets[colIndex]]));
|
||||
|
||||
// Not all CHAR/VARCHAR are NUL terminated so use length
|
||||
return std::string((char*) &data[offsets[colIndex]],
|
||||
@ -778,7 +776,7 @@ inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const
|
||||
inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
return *((uint32_t*) &data[offsets[colIndex] + 4]);
|
||||
return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));;
|
||||
|
||||
return *((uint16_t*) &data[offsets[colIndex]]);
|
||||
}
|
||||
@ -786,7 +784,7 @@ inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
|
||||
inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
return strings->getPointer(*((uint32_t*) &data[offsets[colIndex]]));
|
||||
return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
|
||||
|
||||
return &data[offsets[colIndex] + 2];
|
||||
}
|
||||
@ -795,7 +793,7 @@ inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) c
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
len = *((uint32_t*) &data[offsets[colIndex] + 4]);
|
||||
len = strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
|
||||
return getVarBinaryField(colIndex);
|
||||
}
|
||||
else
|
||||
@ -1043,9 +1041,8 @@ inline void Row::setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t co
|
||||
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
uint32_t offset = strings->storeString(val, len);
|
||||
*((uint32_t*) &data[offsets[colIndex]]) = offset;
|
||||
*((uint32_t*) &data[offsets[colIndex] + 4]) = len;
|
||||
uint64_t offset = strings->storeString(val, len);
|
||||
*((uint64_t*) &data[offsets[colIndex]]) = offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1762,26 +1759,29 @@ inline void copyRow(const Row& in, Row* out)
|
||||
copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount()));
|
||||
}
|
||||
|
||||
inline std::string StringStore::getString(uint32_t off, uint32_t len) const
|
||||
inline std::string StringStore::getString(uint64_t off) const
|
||||
{
|
||||
if (off == std::numeric_limits<uint32_t>::max())
|
||||
uint32_t length;
|
||||
|
||||
if (off == std::numeric_limits<uint64_t>::max())
|
||||
return joblist::CPNULLSTRMARK;
|
||||
|
||||
MemChunk* mc;
|
||||
|
||||
if (off & 0x80000000)
|
||||
if (off & 0x8000000000000000)
|
||||
{
|
||||
off = off - 0x80000000;
|
||||
off = off - 0x8000000000000000;
|
||||
|
||||
if (longStrings.size() <= off)
|
||||
return joblist::CPNULLSTRMARK;
|
||||
|
||||
mc = (MemChunk*) longStrings[off].get();
|
||||
return std::string((char*) mc->data, len);
|
||||
memcpy(&length, mc->data, 4);
|
||||
return std::string((char*) mc->data + 4, length);
|
||||
}
|
||||
|
||||
uint32_t chunk = off / CHUNK_SIZE;
|
||||
uint32_t offset = off % CHUNK_SIZE;
|
||||
uint64_t chunk = off / CHUNK_SIZE;
|
||||
uint64_t offset = off % CHUNK_SIZE;
|
||||
|
||||
// this has to handle uninitialized data as well. If it's uninitialized it doesn't matter
|
||||
// what gets returned, it just can't go out of bounds.
|
||||
@ -1790,30 +1790,32 @@ inline std::string StringStore::getString(uint32_t off, uint32_t len) const
|
||||
|
||||
mc = (MemChunk*) mem[chunk].get();
|
||||
|
||||
if ((offset + len) > mc->currentSize)
|
||||
memcpy(&length, &mc->data[offset], 4);
|
||||
|
||||
if ((offset + length) > mc->currentSize)
|
||||
return joblist::CPNULLSTRMARK;
|
||||
|
||||
return std::string((char*) & (mc->data[offset]), len);
|
||||
return std::string((char*) & (mc->data[offset]) + 4, length);
|
||||
}
|
||||
|
||||
inline const uint8_t* StringStore::getPointer(uint32_t off) const
|
||||
inline const uint8_t* StringStore::getPointer(uint64_t off) const
|
||||
{
|
||||
if (off == std::numeric_limits<uint32_t>::max())
|
||||
if (off == std::numeric_limits<uint64_t>::max())
|
||||
return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
|
||||
|
||||
uint32_t chunk = off / CHUNK_SIZE;
|
||||
uint32_t offset = off % CHUNK_SIZE;
|
||||
uint64_t chunk = off / CHUNK_SIZE;
|
||||
uint64_t offset = off % CHUNK_SIZE;
|
||||
MemChunk* mc;
|
||||
|
||||
if (off & 0x80000000)
|
||||
if (off & 0x8000000000000000)
|
||||
{
|
||||
off = off - 0x80000000;
|
||||
off = off - 0x8000000000000000;
|
||||
|
||||
if (longStrings.size() <= off)
|
||||
return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
|
||||
|
||||
mc = (MemChunk*) longStrings[off].get();
|
||||
return mc->data;
|
||||
return mc->data + 4;
|
||||
}
|
||||
|
||||
// this has to handle uninitialized data as well. If it's uninitialized it doesn't matter
|
||||
@ -1826,19 +1828,18 @@ inline const uint8_t* StringStore::getPointer(uint32_t off) const
|
||||
if (offset > mc->currentSize)
|
||||
return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
|
||||
|
||||
return &(mc->data[offset]);
|
||||
return &(mc->data[offset]) + 4;
|
||||
}
|
||||
|
||||
inline bool StringStore::isNullValue(uint32_t off, uint32_t len) const
|
||||
inline bool StringStore::isNullValue(uint64_t off) const
|
||||
{
|
||||
if (off == std::numeric_limits<uint32_t>::max() || len == 0)
|
||||
uint32_t length;
|
||||
|
||||
if (off == std::numeric_limits<uint64_t>::max())
|
||||
return true;
|
||||
|
||||
if (len < 8)
|
||||
return false;
|
||||
|
||||
// Long strings won't be NULL
|
||||
if (off & 0x80000000)
|
||||
if (off & 0x8000000000000000)
|
||||
return false;
|
||||
|
||||
uint32_t chunk = off / CHUNK_SIZE;
|
||||
@ -1849,35 +1850,46 @@ inline bool StringStore::isNullValue(uint32_t off, uint32_t len) const
|
||||
return true;
|
||||
|
||||
mc = (MemChunk*) mem[chunk].get();
|
||||
memcpy(&length, &mc->data[offset], 4);
|
||||
|
||||
if ((offset + len) > mc->currentSize)
|
||||
if (length == 0)
|
||||
return true;
|
||||
|
||||
if (mc->data[offset] == 0) // "" = NULL string for some reason...
|
||||
if (length < 8)
|
||||
return false;
|
||||
|
||||
if ((offset + length) > mc->currentSize)
|
||||
return true;
|
||||
|
||||
return (*((uint64_t*) &mc->data[offset]) == *((uint64_t*) joblist::CPNULLSTRMARK.c_str()));
|
||||
if (mc->data[offset + 4] == 0) // "" = NULL string for some reason...
|
||||
return true;
|
||||
|
||||
return (*((uint64_t*) &mc->data[offset] + 4) == *((uint64_t*) joblist::CPNULLSTRMARK.c_str()));
|
||||
}
|
||||
|
||||
inline bool StringStore::equals(const std::string& str, uint32_t off, uint32_t len) const
|
||||
inline bool StringStore::equals(const std::string& str, uint64_t off) const
|
||||
{
|
||||
if (off == std::numeric_limits<uint32_t>::max() || len == 0)
|
||||
uint32_t length;
|
||||
|
||||
if (off == std::numeric_limits<uint64_t>::max())
|
||||
return str == joblist::CPNULLSTRMARK;
|
||||
|
||||
MemChunk* mc;
|
||||
|
||||
if (off & 0x80000000)
|
||||
if (off & 0x8000000000000000)
|
||||
{
|
||||
if (longStrings.size() <= (off - 0x80000000))
|
||||
if (longStrings.size() <= (off - 0x8000000000000000))
|
||||
return false;
|
||||
|
||||
mc = (MemChunk*) longStrings[off - 0x80000000].get();
|
||||
mc = (MemChunk*) longStrings[off - 0x8000000000000000].get();
|
||||
|
||||
memcpy(&length, mc->data, 4);
|
||||
|
||||
// Not sure if this check it needed, but adds safety
|
||||
if (len > mc->currentSize)
|
||||
if (length > mc->currentSize)
|
||||
return false;
|
||||
|
||||
return (strncmp(str.c_str(), (const char*) mc->data, len) == 0);
|
||||
return (strncmp(str.c_str(), (const char*) mc->data + 4, length) == 0);
|
||||
}
|
||||
|
||||
uint32_t chunk = off / CHUNK_SIZE;
|
||||
@ -1887,11 +1899,44 @@ inline bool StringStore::equals(const std::string& str, uint32_t off, uint32_t l
|
||||
return false;
|
||||
|
||||
mc = (MemChunk*) mem[chunk].get();
|
||||
memcpy(&length, &mc->data[offset], 4);
|
||||
|
||||
if ((offset + len) > mc->currentSize)
|
||||
if ((offset + length) > mc->currentSize)
|
||||
return false;
|
||||
|
||||
return (strncmp(str.c_str(), (const char*) &mc->data[offset], len) == 0);
|
||||
return (strncmp(str.c_str(), (const char*) &mc->data[offset] + 4, length) == 0);
|
||||
}
|
||||
inline uint32_t StringStore::getStringLength(uint64_t off)
|
||||
{
|
||||
uint32_t length;
|
||||
MemChunk* mc;
|
||||
|
||||
if (off == std::numeric_limits<uint64_t>::max())
|
||||
return 0;
|
||||
|
||||
if (off & 0x8000000000000000)
|
||||
{
|
||||
off = off - 0x8000000000000000;
|
||||
|
||||
if (longStrings.size() <= off)
|
||||
return 0;
|
||||
|
||||
mc = (MemChunk*) longStrings[off].get();
|
||||
memcpy(&length, mc->data, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t chunk = off / CHUNK_SIZE;
|
||||
uint64_t offset = off % CHUNK_SIZE;
|
||||
|
||||
if (mem.size() <= chunk)
|
||||
return 0;
|
||||
|
||||
mc = (MemChunk*) mem[chunk].get();
|
||||
memcpy(&length, &mc->data[offset], 4);
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
inline bool StringStore::isEmpty() const
|
||||
|
Reference in New Issue
Block a user