mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-06-01 22:41:43 +03:00
324 lines
12 KiB
C++
324 lines
12 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
/******************************************************************************
|
|
* $Id: primitiveprocessor.h 2035 2013-01-21 14:12:19Z rdempsey $
|
|
*
|
|
*****************************************************************************/
|
|
|
|
/** @file */
|
|
|
|
#ifndef PRIMITIVEPROCESSOR_H_
|
|
#define PRIMITIVEPROCESSOR_H_
|
|
|
|
#include <stdexcept>
|
|
#include <vector>
|
|
#ifndef _MSC_VER
|
|
#include <tr1/unordered_set>
|
|
#else
|
|
#include <unordered_set>
|
|
#endif
|
|
|
|
#ifdef __linux__
|
|
#define POSIX_REGEX
|
|
#endif
|
|
|
|
#ifdef POSIX_REGEX
|
|
#include <regex.h>
|
|
#else
|
|
#include <boost/regex.hpp>
|
|
#endif
|
|
#include <cstddef>
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/shared_array.hpp>
|
|
|
|
#include "primitivemsg.h"
|
|
#include "calpontsystemcatalog.h"
|
|
#include "stats.h"
|
|
#include "primproc.h"
|
|
#include "hasher.h"
|
|
|
|
class PrimTest;
|
|
|
|
namespace primitives
|
|
{
|
|
|
|
enum ColumnFilterMode {
|
|
STANDARD,
|
|
TWO_ARRAYS,
|
|
UNORDERED_SET
|
|
};
|
|
|
|
class pcfHasher
|
|
{
|
|
public:
|
|
inline size_t operator()(const int64_t i) const
|
|
{
|
|
return i;
|
|
}
|
|
};
|
|
|
|
class pcfEqual
|
|
{
|
|
public:
|
|
inline size_t operator()(const int64_t f1, const int64_t f2) const
|
|
{
|
|
return f1 == f2;
|
|
}
|
|
};
|
|
|
|
typedef std::tr1::unordered_set<int64_t, pcfHasher, pcfEqual> prestored_set_t;
|
|
typedef std::tr1::unordered_set<std::string, utils::Hasher> DictEqualityFilter;
|
|
|
|
struct idb_regex_t
|
|
{
|
|
#ifdef POSIX_REGEX
|
|
regex_t regex;
|
|
#else
|
|
boost::regex regex;
|
|
#endif
|
|
bool used;
|
|
idb_regex_t() : used(false) { }
|
|
~idb_regex_t() {
|
|
#ifdef POSIX_REGEX
|
|
if (used)
|
|
regfree(®ex);
|
|
#endif
|
|
}
|
|
};
|
|
|
|
struct ParsedColumnFilter {
|
|
ColumnFilterMode columnFilterMode;
|
|
boost::shared_array<int64_t> prestored_argVals;
|
|
boost::shared_array<uint8_t> prestored_cops;
|
|
boost::shared_array<uint8_t> prestored_rfs;
|
|
boost::shared_ptr<prestored_set_t> prestored_set;
|
|
boost::shared_array<idb_regex_t> prestored_regex;
|
|
uint8_t likeOps;
|
|
|
|
ParsedColumnFilter();
|
|
~ParsedColumnFilter();
|
|
};
|
|
|
|
//@bug 1828 These need to be public so that column operations can use it for 'like'
|
|
struct p_DataValue {
|
|
int len;
|
|
const uint8_t *data;
|
|
};
|
|
|
|
boost::shared_ptr<ParsedColumnFilter> parseColumnFilter(const uint8_t *filterString,
|
|
uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP);
|
|
|
|
/** @brief This class encapsulates the primitive processing functionality of the system.
|
|
*
|
|
* This class encapsulates the primitive processing functionality of the system.
|
|
*/
|
|
class PrimitiveProcessor
|
|
{
|
|
public:
|
|
PrimitiveProcessor(int debugLevel=0);
|
|
virtual ~PrimitiveProcessor();
|
|
|
|
/** @brief Sets the block to operate on
|
|
*
|
|
* The primitive processing functions operate on one block at a time. The caller
|
|
* sets which block to operate on next with this function.
|
|
*/
|
|
void setBlockPtr(int *data)
|
|
{
|
|
block = data;
|
|
}
|
|
void setPMStatsPtr(dbbc::Stats* p)
|
|
{
|
|
fStatsPtr=p;
|
|
}
|
|
|
|
|
|
/** @brief The interface to Mark's NIOS primitive processing code.
|
|
*
|
|
* The interface to Mark's NIOS primitive processing code. Instead of reading
|
|
* and writing to a bus, it will read/write to buffers specified by inBuf
|
|
* and outBuf. The primitives implemented this way are:
|
|
* - p_Col and p_ColAggregate
|
|
* - p_GetSignature
|
|
*
|
|
* @param inBuf (in) The buffer containing a command to execute
|
|
* @param inLength (in) The size of inBuf in 4-byte words
|
|
* @param outBuf (in) The buffer to store the output in
|
|
* @param outLength (in) The size of outBuf in 4-byte words
|
|
* @param written (out) The number of bytes written to outBuf.
|
|
* @note Throws logic_error if the output buffer is too small for the result.
|
|
*/
|
|
void processBuffer(int *inBuf, unsigned inLength, int *outBuf, unsigned outLength,
|
|
unsigned *written);
|
|
|
|
/* Patrick */
|
|
|
|
/** @brief The p_TokenByScan primitive processor
|
|
*
|
|
* The p_TokenByScan primitive processor. It relies on the caller setting
|
|
* the block to operate on with setBlockPtr(). It assumes the continuation
|
|
* pointer is not used.
|
|
* @param t (in) The arguments to the primitive
|
|
* @param out (out) This must point to memory of some currently unknown max size
|
|
* @param outSize (in) The size of the output buffer in bytes.
|
|
* @note Throws logic_error if the output buffer is too small for the result.
|
|
*/
|
|
void p_TokenByScan(const TokenByScanRequestHeader *t,
|
|
TokenByScanResultHeader *out, unsigned outSize,bool utf8,
|
|
boost::shared_ptr<DictEqualityFilter> eqFilter);
|
|
|
|
/** @brief The p_IdxWalk primitive processor
|
|
*
|
|
* The p_IdxWalk primitive processor. The caller must set the block to operate
|
|
* on with setBlockPtr(). This primitive can return intermediate results.
|
|
* All results returned will have an different LBID than the input. They can
|
|
* also be in varying states of completion. A result is final when
|
|
* Shift >= SSlen, otherwise it is intermediate and needs to be reissued with
|
|
* the specified LBID loaded.
|
|
* @note If in->NVALS > 2, new vectors may be returned in the result set, which
|
|
* will have to be deleted by the caller. The test to use right now is
|
|
* ({element}->NVALS > 2 && {element}->State == 0). If that condition is true,
|
|
* delete the vector, otherwise don't. This kludginess is for efficiency's sake
|
|
* and may go away for the sake of sanity later.
|
|
* @note It is safe to delete any vector passed in after the call.
|
|
* @param out The caller should pass in an empty vector. The results
|
|
* will be returned as elements of this vector.
|
|
*/
|
|
void p_IdxWalk(const IndexWalkHeader *in, std::vector<IndexWalkHeader *> *out) throw();
|
|
|
|
/** @brief The p_IdxList primitive processor.
|
|
*
|
|
* The p_IdxList primitive processor. The caller must set the block to operate
|
|
* on with setBlockPtr(). This primitive can return one intermediate result
|
|
* for every call made. If there is an intermediate result returned, it will
|
|
* be the first element, distinguished by its type field. If the
|
|
* first element has a type == RID (3) , there is no intermediate result. If
|
|
* the first element had a type == LLP_SUBBLK (4) or type == LLP_BLK (5),
|
|
* that element is the intermediate result. Its value field will be a pointer
|
|
* to the next section of the list.
|
|
*
|
|
* @param rqst (in) The request header followed by NVALS IndexWalkParams
|
|
* @param rslt (out) The caller passes in a buffer which will be filled
|
|
* by the primitive on return. It will consist of an IndexListHeader,
|
|
* followed by NVALS IndexListEntrys.
|
|
* @param mode (optional, in) 0 specifies old behavior (the last entry of a block might
|
|
* be a pointer). 1 specifies new behavior (the last entry should be ignored).
|
|
*/
|
|
void p_IdxList(const IndexListHeader *rqst, IndexListHeader *rslt, int mode = 1);
|
|
|
|
/** @brief The p_AggregateSignature primitive processor.
|
|
*
|
|
* The p_AggregateSignature primitive processor. It operates on a dictionary
|
|
* block and assumes the continuation pointer is not used.
|
|
* @param in The input parameters
|
|
* @param out A pointer to a buffer where the result will be written.
|
|
* @param outSize The size of the output buffer in bytes.
|
|
* @param written (out parameter) A pointer to 1 int, which will contain the
|
|
* number of bytes written to out.
|
|
*/
|
|
void p_AggregateSignature(const AggregateSignatureRequestHeader *in,
|
|
AggregateSignatureResultHeader *out, unsigned outSize, unsigned *written, bool utf8);
|
|
|
|
/** @brief The p_Col primitive processor.
|
|
*
|
|
* The p_Col primitive processor. It operates on a column block specified using setBlockPtr().
|
|
* @param in The buffer containing the command parameters.
|
|
* The buffer should begin with a NewColRequestHeader structure, followed by
|
|
* an array of 'NOPS' defining the filter to apply (optional),
|
|
* followed by an array of RIDs to apply the filter to (optional).
|
|
* @param out The buffer that will contain the results. On return, it will start with
|
|
* a NewColResultHeader, followed by the output type specified by in->OutputType.
|
|
* \li If OT_RID, it will be an array of RIDs
|
|
* \li If OT_DATAVALUE, it will be an array of matching data values stored in the column
|
|
* \li If OT_BOTH, it will be an array of <DataValue, RID> pairs
|
|
* @param outSize The size of the output buffer in bytes.
|
|
* @param written (out parameter) A pointer to 1 int, which will contain the
|
|
* number of bytes written to out.
|
|
* @note See PrimitiveMsg.h for the type definitions.
|
|
*/
|
|
void p_Col(NewColRequestHeader *in, NewColResultHeader *out, unsigned outSize,
|
|
unsigned *written);
|
|
|
|
boost::shared_ptr<ParsedColumnFilter> parseColumnFilter(const uint8_t *filterString,
|
|
uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP);
|
|
void setParsedColumnFilter(boost::shared_ptr<ParsedColumnFilter>);
|
|
|
|
/** @brief The p_ColAggregate primitive processor.
|
|
*
|
|
* The p_ColAggregate primitive processor. It operates on a column block
|
|
* specified using setBlockPtr().
|
|
* @param in The buffer containing the command parameters. The buffer should begin
|
|
* with a NewColAggRequestHeader, followed by an array of RIDs to generate
|
|
* the data for (optional).
|
|
* @param out The buffer to put the result in. On return, it will contain a
|
|
* NewCollAggResultHeader.
|
|
* @note See PrimitiveMsg.h for the type definitions.
|
|
*/
|
|
// void p_ColAggregate(const NewColAggRequestHeader *in, NewColAggResultHeader *out);
|
|
|
|
void p_Dictionary(const DictInput *in, std::vector<uint8_t> *out, bool utf8,
|
|
bool skipNulls, boost::shared_ptr<DictEqualityFilter> eqFilter,
|
|
uint8_t eqOp);
|
|
|
|
inline void setLogicalBlockMode(bool b) { logicalBlockMode = b; }
|
|
|
|
|
|
|
|
static int convertToRegexp(idb_regex_t *regex, const p_DataValue *str);
|
|
inline static bool isEscapedChar(char c);
|
|
boost::shared_array<idb_regex_t> makeLikeFilter(const DictFilterElement *inputMsg, uint32_t count);
|
|
void setLikeFilter(boost::shared_array<idb_regex_t> filter) { parsedLikeFilter = filter; }
|
|
|
|
private:
|
|
PrimitiveProcessor(const PrimitiveProcessor& rhs);
|
|
PrimitiveProcessor& operator=(const PrimitiveProcessor& rhs);
|
|
|
|
int *block;
|
|
|
|
bool compare(int cmpResult, uint8_t COP, int len1, int len2) throw();
|
|
int compare(int val1, int val2, uint8_t COP, bool lastStage) throw();
|
|
void indexWalk_1(const IndexWalkHeader *in, std::vector<IndexWalkHeader *> *out) throw();
|
|
void indexWalk_2(const IndexWalkHeader *in, std::vector<IndexWalkHeader *> *out) throw();
|
|
void indexWalk_many(const IndexWalkHeader *in, std::vector<IndexWalkHeader *> *out) throw();
|
|
void grabSubTree(const IndexWalkHeader *in, std::vector<IndexWalkHeader *> *out) throw();
|
|
|
|
void nextSig(int NVALS, const PrimToken *tokens, p_DataValue *ret,
|
|
uint8_t outputFlags = 0, bool oldGetSigBehavior = false, bool skipNulls = false) throw();
|
|
bool isLike(const p_DataValue *dict, const idb_regex_t *arg) throw();
|
|
|
|
// void do_sum8(NewColAggResultHeader *out, int64_t val);
|
|
// void do_unsignedsum8(NewColAggResultHeader *out, int64_t val);
|
|
|
|
uint64_t masks[11];
|
|
int dict_OffsetIndex, currentOffsetIndex; // used by p_dictionary
|
|
int fDebugLevel;
|
|
dbbc::Stats* fStatsPtr; // pointer for pmstats
|
|
bool logicalBlockMode;
|
|
|
|
boost::shared_ptr<ParsedColumnFilter> parsedColumnFilter;
|
|
boost::shared_array<idb_regex_t> parsedLikeFilter;
|
|
|
|
friend class ::PrimTest;
|
|
};
|
|
|
|
} //namespace primitives
|
|
|
|
#endif
|
|
// vim:ts=4 sw=4:
|
|
|