You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-31 18:30:33 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			336 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			336 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (C) 2014 InfiniDB, Inc.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA. */
 | |
| 
 | |
| /******************************************************************************
 | |
|  * $Id: primitiveprocessor.h 2035 2013-01-21 14:12:19Z rdempsey $
 | |
|  *
 | |
|  *****************************************************************************/
 | |
| 
 | |
| /** @file */
 | |
| 
 | |
| #ifndef PRIMITIVEPROCESSOR_H_
 | |
| #define PRIMITIVEPROCESSOR_H_
 | |
| 
 | |
| #include <stdexcept>
 | |
| #include <vector>
 | |
| #ifndef _MSC_VER
 | |
| #include <tr1/unordered_set>
 | |
| #else
 | |
| #include <unordered_set>
 | |
| #endif
 | |
| 
 | |
| #ifdef __linux__
 | |
| #define POSIX_REGEX
 | |
| #endif
 | |
| 
 | |
| #ifdef POSIX_REGEX
 | |
| #include <regex.h>
 | |
| #else
 | |
| #include <boost/regex.hpp>
 | |
| #endif
 | |
| #include <cstddef>
 | |
| #include <boost/shared_ptr.hpp>
 | |
| #include <boost/shared_array.hpp>
 | |
| 
 | |
| #include "primitivemsg.h"
 | |
| #include "calpontsystemcatalog.h"
 | |
| #include "stats.h"
 | |
| #include "primproc.h"
 | |
| #include "hasher.h"
 | |
| 
 | |
| class PrimTest;
 | |
| 
 | |
| namespace primitives
 | |
| {
 | |
| 
 | |
| enum ColumnFilterMode
 | |
| {
 | |
|     STANDARD,
 | |
|     TWO_ARRAYS,
 | |
|     UNORDERED_SET
 | |
| };
 | |
| 
 | |
| class pcfHasher
 | |
| {
 | |
| public:
 | |
|     inline size_t operator()(const int64_t i) const
 | |
|     {
 | |
|         return i;
 | |
|     }
 | |
| };
 | |
| 
 | |
| class pcfEqual
 | |
| {
 | |
| public:
 | |
|     inline size_t operator()(const int64_t f1, const int64_t f2) const
 | |
|     {
 | |
|         return f1 == f2;
 | |
|     }
 | |
| };
 | |
| 
 | |
| typedef std::tr1::unordered_set<int64_t, pcfHasher, pcfEqual> prestored_set_t;
 | |
| typedef std::tr1::unordered_set<std::string, utils::Hasher> DictEqualityFilter;
 | |
| 
 | |
| struct idb_regex_t
 | |
| {
 | |
| #ifdef POSIX_REGEX
 | |
|     regex_t regex;
 | |
| #else
 | |
|     boost::regex regex;
 | |
| #endif
 | |
|     bool used;
 | |
|     idb_regex_t() : used(false) { }
 | |
|     ~idb_regex_t()
 | |
|     {
 | |
| #ifdef POSIX_REGEX
 | |
| 
 | |
|         if (used)
 | |
|             regfree(®ex);
 | |
| 
 | |
| #endif
 | |
|     }
 | |
| };
 | |
| 
 | |
| struct ParsedColumnFilter
 | |
| {
 | |
|     ColumnFilterMode columnFilterMode;
 | |
|     boost::shared_array<int64_t> prestored_argVals;
 | |
|     boost::shared_array<uint8_t> prestored_cops;
 | |
|     boost::shared_array<uint8_t> prestored_rfs;
 | |
|     boost::shared_ptr<prestored_set_t> prestored_set;
 | |
|     boost::shared_array<idb_regex_t> prestored_regex;
 | |
|     uint8_t  likeOps;
 | |
| 
 | |
|     ParsedColumnFilter();
 | |
|     ~ParsedColumnFilter();
 | |
| };
 | |
| 
 | |
| //@bug 1828 These need to be public so that column operations can use it for 'like'
 | |
| struct p_DataValue
 | |
| {
 | |
|     int len;
 | |
|     const uint8_t* data;
 | |
| };
 | |
| 
 | |
| boost::shared_ptr<ParsedColumnFilter> parseColumnFilter(const uint8_t* filterString,
 | |
|         uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP);
 | |
| 
 | |
| /** @brief This class encapsulates the primitive processing functionality of the system.
 | |
|  *
 | |
|  *  This class encapsulates the primitive processing functionality of the system.
 | |
|  */
 | |
| class PrimitiveProcessor
 | |
| {
 | |
| public:
 | |
|     PrimitiveProcessor(int debugLevel = 0);
 | |
|     virtual ~PrimitiveProcessor();
 | |
| 
 | |
|     /** @brief Sets the block to operate on
 | |
|      *
 | |
|      * The primitive processing functions operate on one block at a time.  The caller
 | |
|      * sets which block to operate on next with this function.
 | |
|      */
 | |
|     void setBlockPtr(int* data)
 | |
|     {
 | |
|         block = data;
 | |
|     }
 | |
|     void setPMStatsPtr(dbbc::Stats* p)
 | |
|     {
 | |
|         fStatsPtr = p;
 | |
|     }
 | |
| 
 | |
| 
 | |
|     /** @brief The interface to Mark's NIOS primitive processing code.
 | |
|      *
 | |
|      * The interface to Mark's NIOS primitive processing code.  Instead of reading
 | |
|      * and writing to a bus, it will read/write to buffers specified by inBuf
 | |
|      * and outBuf.  The primitives implemented this way are:
 | |
|      * - p_Col and p_ColAggregate
 | |
|      * - p_GetSignature
 | |
|      *
 | |
|      * @param inBuf (in) The buffer containing a command to execute
 | |
|      * @param inLength (in) The size of inBuf in 4-byte words
 | |
|      * @param outBuf (in) The buffer to store the output in
 | |
|      * @param outLength (in) The size of outBuf in 4-byte words
 | |
|      * @param written (out) The number of bytes written to outBuf.
 | |
|      * @note Throws logic_error if the output buffer is too small for the result.
 | |
|      */
 | |
|     void processBuffer(int* inBuf, unsigned inLength, int* outBuf, unsigned outLength,
 | |
|                        unsigned* written);
 | |
| 
 | |
|     /* Patrick */
 | |
| 
 | |
|     /** @brief The p_TokenByScan primitive processor
 | |
|      *
 | |
|      * The p_TokenByScan primitive processor.  It relies on the caller setting
 | |
|      * the block to operate on with setBlockPtr().  It assumes the continuation
 | |
|      * pointer is not used.
 | |
|      * @param t (in) The arguments to the primitive
 | |
|      * @param out (out) This must point to memory of some currently unknown max size
 | |
|      * @param outSize (in) The size of the output buffer in bytes.
 | |
|      * @note Throws logic_error if the output buffer is too small for the result.
 | |
|      */
 | |
|     void p_TokenByScan(const TokenByScanRequestHeader* t,
 | |
|                        TokenByScanResultHeader* out, unsigned outSize, bool utf8,
 | |
|                        boost::shared_ptr<DictEqualityFilter> eqFilter);
 | |
| 
 | |
|     /** @brief The p_IdxWalk primitive processor
 | |
|      *
 | |
|      * The p_IdxWalk primitive processor.  The caller must set the block to operate
 | |
|      * on with setBlockPtr().  This primitive can return intermediate results.
 | |
|      * All results returned will have an different LBID than the input.  They can
 | |
|      * also be in varying states of completion.  A result is final when
 | |
|      * Shift >= SSlen, otherwise it is intermediate and needs to be reissued with
 | |
|      * the specified LBID loaded.
 | |
|      * @note If in->NVALS > 2, new vectors may be returned in the result set, which
 | |
|      * will have to be deleted by the caller.  The test to use right now is
 | |
|      * ({element}->NVALS > 2 && {element}->State == 0).  If that condition is true,
 | |
|      * delete the vector, otherwise don't.  This kludginess is for efficiency's sake
 | |
|      * and may go away for the sake of sanity later.
 | |
|      * @note It is safe to delete any vector passed in after the call.
 | |
|      * @param out The caller should pass in an empty vector.  The results
 | |
|      * will be returned as elements of this vector.
 | |
|      */
 | |
|     void p_IdxWalk(const IndexWalkHeader* in, std::vector<IndexWalkHeader*>* out) throw();
 | |
| 
 | |
|     /** @brief The p_IdxList primitive processor.
 | |
|      *
 | |
|      * The p_IdxList primitive processor.  The caller must set the block to operate
 | |
|      * on with setBlockPtr().  This primitive can return one intermediate result
 | |
|      * for every call made.  If there is an intermediate result returned, it will
 | |
|      * be the first element, distinguished by its type field.  If the
 | |
|      * first element has a type == RID (3) , there is no intermediate result.  If
 | |
|      * the first element had a type == LLP_SUBBLK (4) or type == LLP_BLK (5),
 | |
|      * that element is the intermediate result.  Its value field will be a pointer
 | |
|      * to the next section of the list.
 | |
|      *
 | |
|      * @param rqst (in) The request header followed by NVALS IndexWalkParams
 | |
|      * @param rslt (out) The caller passes in a buffer which will be filled
 | |
|      * by the primitive on return.  It will consist of an IndexListHeader,
 | |
|      * followed by NVALS IndexListEntrys.
 | |
|      * @param mode (optional, in) 0 specifies old behavior (the last entry of a block might
 | |
|      * be a pointer).  1 specifies new behavior (the last entry should be ignored).
 | |
|      */
 | |
|     void p_IdxList(const IndexListHeader* rqst, IndexListHeader* rslt, int mode = 1);
 | |
| 
 | |
|     /** @brief The p_AggregateSignature primitive processor.
 | |
|      *
 | |
|      * The p_AggregateSignature primitive processor.  It operates on a dictionary
 | |
|      * block and assumes the continuation pointer is not used.
 | |
|      * @param in The input parameters
 | |
|      * @param out A pointer to a buffer where the result will be written.
 | |
|      * @param outSize The size of the output buffer in bytes.
 | |
|      * @param written (out parameter) A pointer to 1 int, which will contain the
 | |
|      * number of bytes written to out.
 | |
|      */
 | |
|     void p_AggregateSignature(const AggregateSignatureRequestHeader* in,
 | |
|                               AggregateSignatureResultHeader* out, unsigned outSize, unsigned* written, bool utf8);
 | |
| 
 | |
|     /** @brief The p_Col primitive processor.
 | |
|      *
 | |
|      * The p_Col primitive processor.  It operates on a column block specified using setBlockPtr().
 | |
|      * @param in The buffer containing the command parameters.
 | |
|      * 		The buffer should begin with a NewColRequestHeader structure, followed by
 | |
|      * 		an array of 'NOPS' defining the filter to apply (optional),
 | |
|      * 		followed by an array of RIDs to apply the filter to (optional).
 | |
|      * @param out The buffer that will contain the results.  On return, it will start with
 | |
|      * a NewColResultHeader, followed by the output type specified by in->OutputType.
 | |
|      * \li If OT_RID, it will be an array of RIDs
 | |
|      * \li If OT_DATAVALUE, it will be an array of matching data values stored in the column
 | |
|      * \li If OT_BOTH, it will be an array of <DataValue, RID> pairs
 | |
|      * @param outSize The size of the output buffer in bytes.
 | |
|      * @param written (out parameter) A pointer to 1 int, which will contain the
 | |
|      * number of bytes written to out.
 | |
|      * @note See PrimitiveMsg.h for the type definitions.
 | |
|      */
 | |
|     void p_Col(NewColRequestHeader* in, NewColResultHeader* out, unsigned outSize,
 | |
|                unsigned* written);
 | |
| 
 | |
|     boost::shared_ptr<ParsedColumnFilter> parseColumnFilter(const uint8_t* filterString,
 | |
|             uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP);
 | |
|     void setParsedColumnFilter(boost::shared_ptr<ParsedColumnFilter>);
 | |
| 
 | |
|     /** @brief The p_ColAggregate primitive processor.
 | |
|      *
 | |
|      * The p_ColAggregate primitive processor.  It operates on a column block
 | |
|      * specified using setBlockPtr().
 | |
|      * @param in The buffer containing the command parameters.  The buffer should begin
 | |
|      *		with a NewColAggRequestHeader, followed by an array of RIDs to generate
 | |
|      * 		the data for (optional).
 | |
|      * @param out The buffer to put the result in.  On return, it will contain a
 | |
|      * NewCollAggResultHeader.
 | |
|      * @note See PrimitiveMsg.h for the type definitions.
 | |
|      */
 | |
| //	void p_ColAggregate(const NewColAggRequestHeader *in, NewColAggResultHeader *out);
 | |
| 
 | |
|     void p_Dictionary(const DictInput* in, std::vector<uint8_t>* out, bool utf8,
 | |
|                       bool skipNulls, boost::shared_ptr<DictEqualityFilter> eqFilter,
 | |
|                       uint8_t eqOp);
 | |
| 
 | |
|     inline void setLogicalBlockMode(bool b)
 | |
|     {
 | |
|         logicalBlockMode = b;
 | |
|     }
 | |
| 
 | |
| 
 | |
| 
 | |
|     static int convertToRegexp(idb_regex_t* regex, const p_DataValue* str);
 | |
|     inline static bool isEscapedChar(char c);
 | |
|     boost::shared_array<idb_regex_t> makeLikeFilter(const DictFilterElement* inputMsg, uint32_t count);
 | |
|     void setLikeFilter(boost::shared_array<idb_regex_t> filter)
 | |
|     {
 | |
|         parsedLikeFilter = filter;
 | |
|     }
 | |
| 
 | |
| private:
 | |
|     PrimitiveProcessor(const PrimitiveProcessor& rhs);
 | |
|     PrimitiveProcessor& operator=(const PrimitiveProcessor& rhs);
 | |
| 
 | |
|     int* block;
 | |
| 
 | |
|     bool compare(int cmpResult, uint8_t COP, int len1, int len2) throw();
 | |
|     int compare(int val1, int val2, uint8_t COP, bool lastStage) throw();
 | |
|     void indexWalk_1(const IndexWalkHeader* in, std::vector<IndexWalkHeader*>* out) throw();
 | |
|     void indexWalk_2(const IndexWalkHeader* in, std::vector<IndexWalkHeader*>* out) throw();
 | |
|     void indexWalk_many(const IndexWalkHeader* in, std::vector<IndexWalkHeader*>* out) throw();
 | |
|     void grabSubTree(const IndexWalkHeader* in, std::vector<IndexWalkHeader*>* out) throw();
 | |
| 
 | |
|     void nextSig(int NVALS, const PrimToken* tokens, p_DataValue* ret,
 | |
|                  uint8_t outputFlags = 0, bool oldGetSigBehavior = false, bool skipNulls = false) throw();
 | |
|     bool isLike(const p_DataValue* dict, const idb_regex_t* arg) throw();
 | |
| 
 | |
| //	void do_sum8(NewColAggResultHeader *out, int64_t val);
 | |
| //    void do_unsignedsum8(NewColAggResultHeader *out, int64_t val);
 | |
| 
 | |
|     uint64_t masks[11];
 | |
|     int dict_OffsetIndex, currentOffsetIndex;		// used by p_dictionary
 | |
|     int fDebugLevel;
 | |
|     dbbc::Stats* fStatsPtr; // pointer for pmstats
 | |
|     bool logicalBlockMode;
 | |
| 
 | |
|     boost::shared_ptr<ParsedColumnFilter> parsedColumnFilter;
 | |
|     boost::shared_array<idb_regex_t> parsedLikeFilter;
 | |
| 
 | |
|     friend class ::PrimTest;
 | |
| };
 | |
| 
 | |
| } //namespace primitives
 | |
| 
 | |
| #endif
 | |
| // vim:ts=4 sw=4:
 | |
| 
 |