1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-874 StringStore Mk.3

StringStore as a vector of std::string had a performance regressions and
a rare crash.

This new version of StringStore restores the original StringStore with
the 64KB limitation and adds another vector to store strings that won't
fit into the small string storage.
This commit is contained in:
Andrew Hutchings
2017-08-14 21:47:04 +01:00
parent 553c3d5f27
commit 3f040173d2
2 changed files with 169 additions and 42 deletions

View File

@ -69,10 +69,10 @@ StringStore::~StringStore()
uint32_t i;
uint64_t inUse = 0, allocated = 0;
for (i = 0; i < mem.size(); i++) {
std::string *tmp = mem.back().get();
inUse += tmp->length();
allocated += tmp->length();
for (i = 0; i < mem.size(); i++) {
MemChunk *tmp = (MemChunk *) mem.back().get();
inUse += tmp->currentSize;
allocated += tmp->capacity;
}
if (allocated > 0)
cout << "~SS: " << inUse << "/" << allocated << " = " << (float) inUse/(float) allocated << endl;
@ -81,6 +81,7 @@ StringStore::~StringStore()
uint32_t StringStore::storeString(const uint8_t *data, uint32_t len)
{
MemChunk *lastMC = NULL;
uint32_t ret = 0;
empty = false; // At least a NULL is being stored.
@ -98,10 +99,47 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len)
if (fUseStoreStringMutex)
lk.lock();
shared_ptr<std::string> newString(new std::string((char*)data, len));
mem.push_back(newString);
if (mem.size() > 0)
lastMC = (MemChunk *) mem.back().get();
ret = mem.size();
if (len >= CHUNK_SIZE)
{
shared_array<uint8_t> newOne(new uint8_t[len + sizeof(MemChunk)]);
longStrings.push_back(newOne);
lastMC = (MemChunk*) longStrings.back().get();
lastMC->capacity = lastMC->currentSize = len;
memcpy(lastMC->data, data, len);
// High bit to mark a long string
ret = 0x80000000;
ret += longStrings.size() - 1;
}
else
{
if ((lastMC == NULL) || (lastMC->capacity - lastMC->currentSize < len))
{
// mem usage debugging
//if (lastMC)
//cout << "Memchunk efficiency = " << lastMC->currentSize << "/" << lastMC->capacity << endl;
shared_array<uint8_t> newOne(new uint8_t[CHUNK_SIZE + sizeof(MemChunk)]);
mem.push_back(newOne);
lastMC = (MemChunk *) mem.back().get();
lastMC->currentSize = 0;
lastMC->capacity = CHUNK_SIZE;
memset(lastMC->data, 0, CHUNK_SIZE);
}
ret = ((mem.size()-1) * CHUNK_SIZE) + lastMC->currentSize;
memcpy(&(lastMC->data[lastMC->currentSize]), data, len);
/*
cout << "stored: '" << hex;
for (uint32_t i = 0; i < len ; i++) {
cout << (char) lastMC->data[lastMC->currentSize + i];
}
cout << "' at position " << lastMC->currentSize << " len " << len << dec << endl;
*/
lastMC->currentSize += len;
}
return ret;
}
@ -109,15 +147,22 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len)
void StringStore::serialize(ByteStream &bs) const
{
uint32_t i;
MemChunk *mc;
bs << (uint32_t) mem.size();
bs << (uint8_t) empty;
for (i = 0; i < mem.size(); i++) {
if (mem[i].get() == NULL)
bs << empty_str;
else
bs << *mem[i].get();
mc = (MemChunk *) mem[i].get();
bs << (uint32_t) mc->currentSize;
//cout << "serialized " << mc->currentSize << " bytes\n";
bs.append(mc->data, mc->currentSize);
}
bs << (uint32_t) longStrings.size();
for (i = 0; i < longStrings.size(); i++)
{
mc = (MemChunk *) longStrings[i].get();
bs << (uint32_t) mc->currentSize;
bs.append(mc->data, mc->currentSize);
}
}
@ -125,30 +170,48 @@ void StringStore::deserialize(ByteStream &bs)
{
uint32_t i;
uint32_t count;
std::string buf;
uint32_t size;
uint8_t *buf;
MemChunk *mc;
uint8_t tmp8;
//mem.clear();
bs >> count;
mem.reserve(count);
mem.resize(count);
bs >> tmp8;
empty = (bool) tmp8;
for (i = 0; i < count; i++) {
bs >> size;
//cout << "deserializing " << size << " bytes\n";
bs >> buf;
// We do this to avoid pre-C++11 zero copy hell but need to
// preserve all data including NULs so using c_str() is out.
shared_ptr<std::string> newString(new std::string());
newString->append(buf);
mem.push_back(newString);
buf = bs.buf();
mem[i].reset(new uint8_t[size + sizeof(MemChunk)]);
mc = (MemChunk *) mem[i].get();
mc->currentSize = size;
mc->capacity = size;
memcpy(mc->data, buf, size);
bs.advance(size);
}
bs >> count;
longStrings.resize(count);
for (i = 0; i < count; i++)
{
bs >> size;
buf = bs.buf();
longStrings[i].reset(new uint8_t[size + sizeof(MemChunk)]);
mc = (MemChunk *) longStrings[i].get();
mc->capacity = mc->currentSize = size;
memcpy(mc->data, buf, size);
bs.advance(size);
}
return;
}
void StringStore::clear()
{
vector<shared_ptr<std::string> > emptyv;
vector<shared_array<uint8_t> > emptyv;
vector<shared_array<uint8_t> > emptyv2;
mem.swap(emptyv);
longStrings.swap(emptyv2);
empty = true;
}