You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
MCOL-4580 extent elimination for dictionary-based text/varchar types
The idea is relatively simple - encode prefixes of collated strings as integers and use them to compute extents' ranges. Then we can eliminate extents with strings. The actual patch does have all the code there but miss one important step: we do not keep collation index, we keep charset index. Because of this, some of the tests in the bugfix suite fail and thus main functionality is turned off. The reason of this patch to be put into PR at all is that it contains changes that made CHAR/VARCHAR columns unsigned. This change is needed in vectorization work.
This commit is contained in:
@ -24,6 +24,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <boost/thread.hpp>
|
||||
#include <boost/thread/tss.hpp>
|
||||
@ -49,17 +50,19 @@ namespace WriteEngine
|
||||
// forward reference
|
||||
class DbFileOp;
|
||||
|
||||
/** @brief Extended CPInfo - with type handler for all type-related information */
|
||||
/** @brief Extended CPInfo - with all type-related information and associated range data */
|
||||
struct ExtCPInfo
|
||||
{
|
||||
execplan::CalpontSystemCatalog::ColDataType fColType;
|
||||
int fColWidth;
|
||||
BRM::CPInfo fCPInfo;
|
||||
std::shared_ptr<std::vector<int64_t>> fStringsPrefixes;
|
||||
ExtCPInfo(execplan::CalpontSystemCatalog::ColDataType colType, int colWidth)
|
||||
: fColType(colType), fColWidth(colWidth)
|
||||
{
|
||||
fCPInfo.isBinaryColumn = (unsigned int)colWidth > datatypes::MAXLEGACYWIDTH;
|
||||
}
|
||||
|
||||
void toInvalid()
|
||||
{
|
||||
auto mm = datatypes::MinMaxInfo::invalidRange(fColType);
|
||||
@ -68,7 +71,22 @@ struct ExtCPInfo
|
||||
fCPInfo.bigMax = mm.int128Max;
|
||||
fCPInfo.bigMin = mm.int128Min;
|
||||
}
|
||||
|
||||
void addStringPrefix(int64_t strPrefix)
|
||||
{
|
||||
if (!fStringsPrefixes)
|
||||
{
|
||||
fStringsPrefixes.reset(new std::vector<int64_t>());
|
||||
}
|
||||
fStringsPrefixes->push_back(strPrefix);
|
||||
}
|
||||
bool hasStringsPrefixes() const
|
||||
{
|
||||
return fStringsPrefixes.get() != nullptr;
|
||||
}
|
||||
int64_t* stringsPrefixes() const
|
||||
{
|
||||
return hasStringsPrefixes() ? fStringsPrefixes->data() : nullptr;
|
||||
}
|
||||
bool isInvalid()
|
||||
{
|
||||
datatypes::MinMaxInfo mm;
|
||||
|
@ -344,6 +344,7 @@ struct DctnryStruct /** @brief Dctnry Interface Struct*/
|
||||
uint16_t fColSegment; /** @brief Segment for column file */
|
||||
uint16_t fColDbRoot; /** @brief DBRoot for column file */
|
||||
int fCompressionType; /** @brief Compression tpye for column file */
|
||||
int fCharsetNumber; /** @brief Charset number to account for collation when computing string prefixes */
|
||||
DctnryStruct()
|
||||
: dctnryOid(0)
|
||||
, columnOid(0)
|
||||
@ -353,6 +354,7 @@ struct DctnryStruct /** @brief Dctnry Interface Struct*/
|
||||
, fColSegment(0)
|
||||
, fColDbRoot(0)
|
||||
, fCompressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0)
|
||||
, fCharsetNumber(8)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
Reference in New Issue
Block a user