1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

fix(aggregation, disk-based) MCOL-5691 distinct aggregate disk based (#3145)

* fix(aggregation, disk-based): MCOL-5689 this fixes disk-based distinct aggregation functions
Previously disk-based distinct aggregation functions produced incorrect results b/c there was no finalization applied for previous generations stored on disk.

*  fix(aggregation, disk-based): Fix disk-based COUNT(DISTINCT ...) queries. (Case 2). (Distinct & Multi-Distinct, Single- & Multi-Threaded).

* fix(aggregation, disk-based): Fix disk-based DISTINCT & GROUP BY queries. (Case 1). (Distinct & Multi-Distinct, Single- & Multi-Threaded).

---------

Co-authored-by: Theresa Hradilak <theresa.hradilak@gmail.com>
Co-authored-by: Roman Nozdrin <rnozdrin@mariadb.com>
This commit is contained in:
drrtuy
2024-03-24 17:04:37 +02:00
committed by Leonid Fedorov
parent 8cb7bc8e54
commit 444cf4c65e
7 changed files with 398 additions and 128 deletions

View File

@ -20,6 +20,7 @@
#include "resourcemanager.h"
#include "rowgroup.h"
#include "idbcompress.h"
#include <cstdint>
#include <random>
#include <sys/stat.h>
#include <unistd.h>
@ -35,10 +36,15 @@ class RowPosHashStorage;
using RowPosHashStoragePtr = std::unique_ptr<RowPosHashStorage>;
class RowGroupStorage;
using RGDataUnPtr = std::unique_ptr<RGData>;
using PosOpos = std::pair<uint64_t, uint64_t>;
using FgidTgid = std::pair<uint64_t, uint64_t>;
uint64_t hashRow(const rowgroup::Row& r, std::size_t lastCol);
constexpr const size_t MaxConstStrSize = 2048ULL;
constexpr const size_t MaxConstStrBufSize = MaxConstStrSize << 1;
constexpr const uint64_t HashMaskElements = 64ULL;
class RowAggStorage
{
@ -97,6 +103,12 @@ class RowAggStorage
*/
std::unique_ptr<RGData> getNextRGData();
/** @brief Remove last RGData from in-memory storage or disk.
* Iterates over all generations on disk if available.
* @returns True if RGData is returned in parameter or false if no more RGDatas can be returned.
*/
bool getNextOutputRGData(std::unique_ptr<RGData>& rgdata);
/** @brief TODO
*
* @param mergeFunc