You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-874 StringStore Mk.3
StringStore as a vector of std::string had a performance regressions and a rare crash. This new version of StringStore restores the original StringStore with the 64KB limitation and adds another vector to store strings that won't fit into the small string storage.
This commit is contained in:
@ -69,10 +69,10 @@ StringStore::~StringStore()
|
||||
uint32_t i;
|
||||
uint64_t inUse = 0, allocated = 0;
|
||||
|
||||
for (i = 0; i < mem.size(); i++) {
|
||||
std::string *tmp = mem.back().get();
|
||||
inUse += tmp->length();
|
||||
allocated += tmp->length();
|
||||
for (i = 0; i < mem.size(); i++) {
|
||||
MemChunk *tmp = (MemChunk *) mem.back().get();
|
||||
inUse += tmp->currentSize;
|
||||
allocated += tmp->capacity;
|
||||
}
|
||||
if (allocated > 0)
|
||||
cout << "~SS: " << inUse << "/" << allocated << " = " << (float) inUse/(float) allocated << endl;
|
||||
@ -81,6 +81,7 @@ StringStore::~StringStore()
|
||||
|
||||
uint32_t StringStore::storeString(const uint8_t *data, uint32_t len)
|
||||
{
|
||||
MemChunk *lastMC = NULL;
|
||||
uint32_t ret = 0;
|
||||
|
||||
empty = false; // At least a NULL is being stored.
|
||||
@ -98,10 +99,47 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len)
|
||||
if (fUseStoreStringMutex)
|
||||
lk.lock();
|
||||
|
||||
shared_ptr<std::string> newString(new std::string((char*)data, len));
|
||||
mem.push_back(newString);
|
||||
if (mem.size() > 0)
|
||||
lastMC = (MemChunk *) mem.back().get();
|
||||
|
||||
ret = mem.size();
|
||||
if (len >= CHUNK_SIZE)
|
||||
{
|
||||
shared_array<uint8_t> newOne(new uint8_t[len + sizeof(MemChunk)]);
|
||||
longStrings.push_back(newOne);
|
||||
lastMC = (MemChunk*) longStrings.back().get();
|
||||
lastMC->capacity = lastMC->currentSize = len;
|
||||
memcpy(lastMC->data, data, len);
|
||||
// High bit to mark a long string
|
||||
ret = 0x80000000;
|
||||
ret += longStrings.size() - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((lastMC == NULL) || (lastMC->capacity - lastMC->currentSize < len))
|
||||
{
|
||||
// mem usage debugging
|
||||
//if (lastMC)
|
||||
//cout << "Memchunk efficiency = " << lastMC->currentSize << "/" << lastMC->capacity << endl;
|
||||
shared_array<uint8_t> newOne(new uint8_t[CHUNK_SIZE + sizeof(MemChunk)]);
|
||||
mem.push_back(newOne);
|
||||
lastMC = (MemChunk *) mem.back().get();
|
||||
lastMC->currentSize = 0;
|
||||
lastMC->capacity = CHUNK_SIZE;
|
||||
memset(lastMC->data, 0, CHUNK_SIZE);
|
||||
}
|
||||
|
||||
|
||||
ret = ((mem.size()-1) * CHUNK_SIZE) + lastMC->currentSize;
|
||||
memcpy(&(lastMC->data[lastMC->currentSize]), data, len);
|
||||
/*
|
||||
cout << "stored: '" << hex;
|
||||
for (uint32_t i = 0; i < len ; i++) {
|
||||
cout << (char) lastMC->data[lastMC->currentSize + i];
|
||||
}
|
||||
cout << "' at position " << lastMC->currentSize << " len " << len << dec << endl;
|
||||
*/
|
||||
lastMC->currentSize += len;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -109,15 +147,22 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len)
|
||||
void StringStore::serialize(ByteStream &bs) const
|
||||
{
|
||||
uint32_t i;
|
||||
MemChunk *mc;
|
||||
|
||||
bs << (uint32_t) mem.size();
|
||||
bs << (uint8_t) empty;
|
||||
for (i = 0; i < mem.size(); i++) {
|
||||
if (mem[i].get() == NULL)
|
||||
bs << empty_str;
|
||||
else
|
||||
bs << *mem[i].get();
|
||||
mc = (MemChunk *) mem[i].get();
|
||||
bs << (uint32_t) mc->currentSize;
|
||||
//cout << "serialized " << mc->currentSize << " bytes\n";
|
||||
bs.append(mc->data, mc->currentSize);
|
||||
}
|
||||
bs << (uint32_t) longStrings.size();
|
||||
for (i = 0; i < longStrings.size(); i++)
|
||||
{
|
||||
mc = (MemChunk *) longStrings[i].get();
|
||||
bs << (uint32_t) mc->currentSize;
|
||||
bs.append(mc->data, mc->currentSize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -125,30 +170,48 @@ void StringStore::deserialize(ByteStream &bs)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t count;
|
||||
std::string buf;
|
||||
uint32_t size;
|
||||
uint8_t *buf;
|
||||
MemChunk *mc;
|
||||
uint8_t tmp8;
|
||||
|
||||
//mem.clear();
|
||||
bs >> count;
|
||||
mem.reserve(count);
|
||||
mem.resize(count);
|
||||
bs >> tmp8;
|
||||
empty = (bool) tmp8;
|
||||
for (i = 0; i < count; i++) {
|
||||
bs >> size;
|
||||
//cout << "deserializing " << size << " bytes\n";
|
||||
bs >> buf;
|
||||
// We do this to avoid pre-C++11 zero copy hell but need to
|
||||
// preserve all data including NULs so using c_str() is out.
|
||||
shared_ptr<std::string> newString(new std::string());
|
||||
newString->append(buf);
|
||||
mem.push_back(newString);
|
||||
buf = bs.buf();
|
||||
mem[i].reset(new uint8_t[size + sizeof(MemChunk)]);
|
||||
mc = (MemChunk *) mem[i].get();
|
||||
mc->currentSize = size;
|
||||
mc->capacity = size;
|
||||
memcpy(mc->data, buf, size);
|
||||
bs.advance(size);
|
||||
}
|
||||
bs >> count;
|
||||
longStrings.resize(count);
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
bs >> size;
|
||||
buf = bs.buf();
|
||||
longStrings[i].reset(new uint8_t[size + sizeof(MemChunk)]);
|
||||
mc = (MemChunk *) longStrings[i].get();
|
||||
mc->capacity = mc->currentSize = size;
|
||||
memcpy(mc->data, buf, size);
|
||||
bs.advance(size);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void StringStore::clear()
|
||||
{
|
||||
vector<shared_ptr<std::string> > emptyv;
|
||||
vector<shared_array<uint8_t> > emptyv;
|
||||
vector<shared_array<uint8_t> > emptyv2;
|
||||
mem.swap(emptyv);
|
||||
longStrings.swap(emptyv2);
|
||||
empty = true;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user