1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

[MCOL-4709] Disk-based aggregation

* Introduce multigeneration aggregation

* Do not save unused part of RGDatas to disk
* Add IO error explanation (strerror)

* Reduce memory usage while aggregating
* introduce in-memory generations to better memory utilization

* Try to limit the qty of buckets at a low limit

* Refactor disk aggregation a bit
* pass calculated hash into RowAggregation
* try to keep some RGData with free space in memory

* do not dump more than half of rowgroups to disk if generations are
  allowed, instead start a new generation
* for each thread shift the first processed bucket at each iteration,
  so the generations start more evenly

* Unify temp data location

* Explicitly create temp subdirectories
  whether disk aggregation/join are enabled or not
This commit is contained in:
Alexey Antipovsky
2021-01-15 18:52:13 +03:00
parent 3537c0d635
commit 475104e4d3
24 changed files with 5932 additions and 906 deletions

View File

@ -0,0 +1,14 @@
include_directories( ${ENGINE_COMMON_INCLUDES} )
########### next target ###############
set(rgprint_SRCS rgprint.cpp)
add_executable(rgprint ${rgprint_SRCS})
target_link_libraries(rgprint ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
install(TARGETS rgprint DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)

94
tools/rgprint/rgprint.cpp Normal file
View File

@ -0,0 +1,94 @@
/* Copyright (C) 2021 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <iostream>
#include <utils/rowgroup/rowgroup.h>
int main(int argc, char* argv[])
{
if (argc < 2)
{
std::cerr << "Usage: " << argv[0] << " <dump file>" << std::endl;
return 0;
}
rowgroup::RowGroup rg;
char* p = strrchr(argv[1], '/');
int rfd = -1;
if (p == nullptr)
p = argv[1];
unsigned pid;
void* agg;
auto c = sscanf(p, "Agg-p%u-t%p-", &pid, &agg);
if (c == 2) {
char fname[1024];
snprintf(fname, sizeof(fname), "META-p%u-t%p", pid, agg);
rfd = open(fname, O_RDONLY);
}
if (rfd < 0)
rfd = open("./META", O_RDONLY);
if (rfd >= 0) {
struct stat rst;
fstat(rfd, &rst);
messageqcpp::ByteStream rbs;
rbs.needAtLeast(rst.st_size);
rbs.restart();
auto r = read(rfd, rbs.getInputPtr(), rst.st_size);
if (r != rst.st_size)
abort();
rbs.advanceInputPtr(r);
rg.deserialize(rbs);
close(rfd);
} else {
std::vector<uint32_t> pos{2, 6, 22, 30, 46, 54}; // ?
std::vector<uint32_t> oids{3011, 3011, 3011, 3011, 3011}; // ?
std::vector<uint32_t> keys{1, 1, 1, 1, 1}; // ?
std::vector<execplan::CalpontSystemCatalog::ColDataType> col_t{
execplan::CalpontSystemCatalog::INT,
execplan::CalpontSystemCatalog::LONGDOUBLE,
execplan::CalpontSystemCatalog::UBIGINT,
execplan::CalpontSystemCatalog::LONGDOUBLE,
execplan::CalpontSystemCatalog::UBIGINT
};
std::vector<uint32_t> csN{8, 8, 8, 8, 8};
std::vector<uint32_t> scale{0, 0, 0, 0, 0};
std::vector<uint32_t> prec{10, 4294967295, 9999, 4294967295, 19};
rg = rowgroup::RowGroup(5, pos, oids, keys, col_t, csN, scale, prec, 20, false, std::vector<bool>{});
}
int fd = open(argv[1], O_RDONLY);
struct stat st;
fstat(fd, &st);
messageqcpp::ByteStream bs;
bs.needAtLeast(st.st_size);
bs.restart();
auto r = read(fd, bs.getInputPtr(), st.st_size);
if (r != st.st_size)
abort();
bs.advanceInputPtr(r);
rowgroup::RGData rst;
rst.deserialize(bs);
rg.setData(&rst);
close(fd);
std::cout << "RowGroup data:\n" << rg.toString() << std::endl;
return 0;
}