1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-06-13 16:01:32 +03:00

[MCOL-4709] Disk-based aggregation

* Introduce multigeneration aggregation

* Do not save unused part of RGDatas to disk
* Add IO error explanation (strerror)

* Reduce memory usage while aggregating
* introduce in-memory generations to better memory utilization

* Try to limit the qty of buckets at a low limit

* Refactor disk aggregation a bit
* pass calculated hash into RowAggregation
* try to keep some RGData with free space in memory

* do not dump more than half of rowgroups to disk if generations are
  allowed, instead start a new generation
* for each thread shift the first processed bucket at each iteration,
  so the generations start more evenly

* Unify temp data location

* Explicitly create temp subdirectories
  whether disk aggregation/join are enabled or not
This commit is contained in:
Alexey Antipovsky
2021-01-15 18:52:13 +03:00
parent 3537c0d635
commit 475104e4d3
24 changed files with 5932 additions and 906 deletions

View File

@ -59,6 +59,9 @@ namespace fs = boost::filesystem;
#include "installdir.h"
#ifdef _MSC_VER
#include "idbregistry.h"
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "bytestream.h"
@ -673,6 +676,24 @@ const vector<string> Config::enumSection(const string& section)
return fParser.enumSection(fDoc, section);
}
std::string Config::getTempFileDir(Config::TempDirPurpose what)
{
std::string prefix = getConfig("SystemConfig", "SystemTempFileDir");
if (prefix.empty())
{
prefix.assign("/tmp/columnstore_tmp_files");
}
prefix.append("/");
switch (what)
{
case TempDirPurpose::Joins:
return prefix.append("joins/");
case TempDirPurpose::Aggregates:
return prefix.append("aggregates/");
}
// NOTREACHED
return {};
}
} //namespace config
// vim:ts=4 sw=4: