You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-31 18:30:33 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			1071 lines
		
	
	
		
			33 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1071 lines
		
	
	
		
			33 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (C) 2014 InfiniDB, Inc.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA. */
 | |
| 
 | |
| /*
 | |
|  * $Id: dictionary.cpp 2122 2013-07-08 16:33:50Z bpaul $
 | |
|  */
 | |
| 
 | |
| #include <iostream>
 | |
| #include <boost/scoped_array.hpp>
 | |
| #include <boost/algorithm/string/trim.hpp>
 | |
| #include <sys/types.h>
 | |
| using namespace std;
 | |
| 
 | |
| #include "primitiveprocessor.h"
 | |
| #include "we_type.h"
 | |
| #include "messagelog.h"
 | |
| #include "messageobj.h"
 | |
| #include "exceptclasses.h"
 | |
| #include "utils_utf8.h"
 | |
| #include <sstream>
 | |
| 
 | |
| using namespace funcexp;
 | |
| using namespace logging;
 | |
| 
 | |
| const char* nullString = " ";  // this is not NULL to preempt segfaults.
 | |
| const int nullStringLen = 0;
 | |
| 
 | |
| namespace
 | |
| {
 | |
| const char* signatureNotFound = joblist::CPSTRNOTFOUND.c_str();
 | |
| }
 | |
| 
 | |
| namespace primitives
 | |
| {
 | |
| 
 | |
| inline bool PrimitiveProcessor::compare(int cmp, uint8_t COP, int len1, int len2) throw()
 | |
| {
 | |
| 
 | |
|     switch (COP)
 | |
|     {
 | |
|         case COMPARE_NIL:
 | |
|             return false;
 | |
| 
 | |
|         case COMPARE_LT:
 | |
|             return (cmp < 0 || (cmp == 0 && len1 < len2));
 | |
| 
 | |
|         case COMPARE_EQ:
 | |
|             return (cmp == 0 && len1 == len2 ? true : false);
 | |
| 
 | |
|         case COMPARE_LE:
 | |
|             return (cmp < 0  || (cmp == 0 && len1 <= len2));
 | |
| 
 | |
|         case COMPARE_GT:
 | |
|             return (cmp > 0 || (cmp == 0 && len1 > len2));
 | |
| 
 | |
|         case COMPARE_NE:
 | |
|             return (cmp != 0 || len1 != len2 ? true : false);
 | |
| 
 | |
|         case COMPARE_GE:
 | |
|             return (cmp > 0 || (cmp == 0 && len1 >= len2));
 | |
| 
 | |
|         case COMPARE_LIKE:
 | |
|             return cmp;							// is done elsewhere; shouldn't get here.  Exception?
 | |
| 
 | |
|         case COMPARE_NOT:
 | |
|             return false;  						// throw an exception here?
 | |
| 
 | |
|         default:
 | |
|             MessageLog logger(LoggingID(28));
 | |
|             logging::Message::Args colWidth;
 | |
|             Message msg(34);
 | |
| 
 | |
|             colWidth.add(COP);
 | |
|             colWidth.add("compare");
 | |
|             msg.format(colWidth);
 | |
|             logger.logErrorMessage(msg);
 | |
|             return false;						// throw an exception here?
 | |
|     }
 | |
| }
 | |
| 
 | |
| /*
 | |
| Notes:
 | |
| 	- assumes no continuation pointer
 | |
| */
 | |
| 
 | |
| void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h,
 | |
|                                        TokenByScanResultHeader* ret, unsigned outSize, bool utf8,
 | |
|                                        boost::shared_ptr<DictEqualityFilter> eqFilter)
 | |
| {
 | |
|     const DataValue* args;
 | |
|     const uint8_t* niceBlock;		// block cast to a byte-indexed type
 | |
|     const uint8_t* niceInput;		// h cast to a byte-indexed type
 | |
|     const uint16_t* offsets;
 | |
|     int offsetIndex, argIndex, argsOffset;
 | |
|     bool cmpResult = false;
 | |
|     int tmp, i, err;
 | |
| 
 | |
|     const char* sig;
 | |
|     uint16_t siglen;
 | |
| 
 | |
|     PrimToken* retTokens;
 | |
|     DataValue* retDataValues;
 | |
|     int rdvOffset;
 | |
|     uint8_t* niceRet;			// ret cast to a byte-indexed type
 | |
| 
 | |
|     boost::scoped_array<idb_regex_t> regex;
 | |
| 
 | |
|     // set up pointers to fields within each structure
 | |
| 
 | |
|     // either retTokens or retDataValues will be used but not both.
 | |
|     niceRet = reinterpret_cast<uint8_t*>(ret);
 | |
|     rdvOffset = sizeof(TokenByScanResultHeader);
 | |
| 
 | |
|     retTokens = reinterpret_cast<PrimToken*>(&niceRet[rdvOffset]);
 | |
|     retDataValues = reinterpret_cast<DataValue*>(&niceRet[rdvOffset]);
 | |
|     memcpy(ret, h, sizeof(PrimitiveHeader) + sizeof(ISMPacketHeader));
 | |
|     ret->NVALS = 0;
 | |
|     ret->NBYTES = sizeof(TokenByScanResultHeader);
 | |
|     ret->ism.Command = DICT_SCAN_COMPARE_RESULTS;
 | |
| 
 | |
|     //...Initialize I/O counts
 | |
|     ret->CacheIO    = 0;
 | |
|     ret->PhysicalIO = 0;
 | |
| 
 | |
|     niceBlock = reinterpret_cast<const uint8_t*>(block);
 | |
|     offsets = reinterpret_cast<const uint16_t*>(&niceBlock[10]);
 | |
|     niceInput = reinterpret_cast<const uint8_t*>(h);
 | |
| 
 | |
|     // if LIKE is an operator, compile regexp's in advance.
 | |
|     if ((h->NVALS > 0 && h->COP1 & COMPARE_LIKE) ||
 | |
|             (h->NVALS == 2 && h->COP2 & COMPARE_LIKE))
 | |
|     {
 | |
|         regex.reset(new idb_regex_t[h->NVALS]);
 | |
| 
 | |
|         for (i = 0, argsOffset = sizeof(TokenByScanRequestHeader); i < h->NVALS; i++)
 | |
|         {
 | |
|             p_DataValue pdvTmp;
 | |
| 
 | |
|             args = reinterpret_cast<const DataValue*>(&niceInput[argsOffset]);
 | |
|             pdvTmp.len = args->len;
 | |
|             pdvTmp.data = (const uint8_t*) args->data;
 | |
|             err = convertToRegexp(®ex[i], &pdvTmp);
 | |
| 
 | |
|             if (err != 0)
 | |
|             {
 | |
|                 MessageLog logger(LoggingID(28));
 | |
|                 Message msg(37);
 | |
|                 logger.logErrorMessage(msg);
 | |
| 
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             argsOffset += sizeof(uint16_t) + args->len;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     for (offsetIndex = 1; offsets[offsetIndex] != 0xffff; offsetIndex++)
 | |
|     {
 | |
| 
 | |
|         siglen = offsets[offsetIndex - 1] - offsets[offsetIndex];
 | |
|         sig = reinterpret_cast<const char*>(&niceBlock[offsets[offsetIndex]]);
 | |
|         argsOffset = sizeof(TokenByScanRequestHeader);
 | |
|         argIndex = 0;
 | |
|         args = reinterpret_cast<const DataValue*>(&niceInput[argsOffset]);
 | |
| 
 | |
|         string sig_utf8;
 | |
|         string arg_utf8;
 | |
| 
 | |
|         if (eqFilter)
 | |
|         {
 | |
|             // MCOL-1246 Trim whitespace before match
 | |
|             string strData(sig, siglen);
 | |
|             boost::trim_right_if(strData, boost::is_any_of(" "));
 | |
|             bool gotIt = eqFilter->find(strData) != eqFilter->end();
 | |
| 
 | |
|             if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE &&
 | |
|                     !gotIt))
 | |
|                 goto store;
 | |
| 
 | |
|             goto no_store;
 | |
|         }
 | |
| 
 | |
|         // BUG 5110: If it is utf, we need to create utf strings to compare
 | |
|         if (utf8)
 | |
|         {
 | |
|             sig_utf8 = string(sig, siglen);
 | |
|             arg_utf8 = string(args->data, args->len);
 | |
|         }
 | |
| 
 | |
|         switch (h->NVALS)
 | |
|         {
 | |
|             case 1:
 | |
|             {
 | |
|                 if (h->COP1 & COMPARE_LIKE)
 | |
|                 {
 | |
|                     p_DataValue dv;
 | |
| 
 | |
|                     dv.len = siglen;
 | |
|                     dv.data = (uint8_t*) sig;
 | |
|                     cmpResult = isLike(&dv, ®ex[argIndex]);
 | |
| 
 | |
|                     if (h->COP1 & COMPARE_NOT)
 | |
|                         cmpResult = !cmpResult;
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     if (utf8)
 | |
|                     {
 | |
|                         tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str());
 | |
|                         cmpResult = compare(tmp, h->COP1, siglen, args->len);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         tmp = strncmp(sig, args->data, std::min(siglen, args->len));
 | |
|                         cmpResult = compare(tmp, h->COP1, siglen, args->len);
 | |
|                     }
 | |
|                 }
 | |
| 
 | |
|                 if (cmpResult)
 | |
|                     goto store;
 | |
| 
 | |
|                 goto no_store;
 | |
|             }
 | |
| 
 | |
|             case 2:
 | |
|             {
 | |
|                 if (h->COP1 & COMPARE_LIKE)
 | |
|                 {
 | |
|                     p_DataValue dv;
 | |
| 
 | |
|                     dv.len = siglen;
 | |
|                     dv.data = (uint8_t*) sig;
 | |
|                     cmpResult = isLike(&dv, ®ex[argIndex]);
 | |
| 
 | |
|                     if (h->COP1 & COMPARE_NOT)
 | |
|                         cmpResult = !cmpResult;
 | |
|                 }
 | |
| 
 | |
|                 else
 | |
|                 {
 | |
|                     if (utf8)
 | |
|                     {
 | |
|                         tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str());
 | |
|                         cmpResult = compare(tmp, h->COP1, siglen, args->len);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         tmp = strncmp(sig, args->data, std::min(siglen, args->len));
 | |
|                         cmpResult = compare(tmp, h->COP1, siglen, args->len);
 | |
|                     }
 | |
|                 }
 | |
| 
 | |
|                 if (!cmpResult && h->BOP == BOP_AND)
 | |
|                     goto no_store;
 | |
| 
 | |
|                 if (cmpResult && h->BOP == BOP_OR)
 | |
|                     goto store;
 | |
| 
 | |
|                 argsOffset += sizeof(uint16_t) + args->len;
 | |
|                 argIndex++;
 | |
|                 args = (DataValue*) &niceInput[argsOffset];
 | |
| 
 | |
|                 if (h->COP2 & COMPARE_LIKE)
 | |
|                 {
 | |
|                     p_DataValue dv;
 | |
| 
 | |
|                     dv.len = siglen;
 | |
|                     dv.data = (uint8_t*) sig;
 | |
|                     cmpResult = isLike(&dv, ®ex[argIndex]);
 | |
| 
 | |
|                     if (h->COP2 & COMPARE_NOT)
 | |
|                         cmpResult = !cmpResult;
 | |
|                 }
 | |
| 
 | |
|                 else
 | |
|                 {
 | |
|                     if (utf8)
 | |
|                     {
 | |
|                         arg_utf8 = string(args->data, args->len);
 | |
|                         tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str());
 | |
|                         cmpResult = compare(tmp, h->COP2, siglen, args->len);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         tmp = strncmp(sig, args->data, std::min(siglen, args->len));
 | |
|                         cmpResult = compare(tmp, h->COP2, siglen, args->len);
 | |
|                     }
 | |
|                 }
 | |
| 
 | |
|                 if (cmpResult)
 | |
|                     goto store;
 | |
| 
 | |
|                 goto no_store;
 | |
|             }
 | |
| 
 | |
|             default:
 | |
|             {
 | |
|                 for (i = 0, cmpResult = true; i < h->NVALS; i++)
 | |
|                 {
 | |
|                     if (h->COP1 & COMPARE_LIKE)
 | |
|                     {
 | |
|                         p_DataValue dv;
 | |
| 
 | |
|                         dv.len = siglen;
 | |
|                         dv.data = (uint8_t*) sig;
 | |
|                         cmpResult = isLike(&dv, ®ex[argIndex]);
 | |
| 
 | |
|                         if (h->COP1 & COMPARE_NOT)
 | |
|                             cmpResult = !cmpResult;
 | |
|                     }
 | |
| 
 | |
|                     else
 | |
|                     {
 | |
|                         if (utf8)
 | |
|                         {
 | |
|                             tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str());
 | |
|                             cmpResult = compare(tmp, h->COP2, siglen, args->len);
 | |
|                         }
 | |
|                         else
 | |
|                         {
 | |
|                             tmp = strncmp(sig, args->data, std::min(siglen, args->len));
 | |
|                             cmpResult = compare(tmp, h->COP1, siglen, args->len);
 | |
|                         }
 | |
|                     }
 | |
| 
 | |
|                     if (!cmpResult && h->BOP == BOP_AND)
 | |
|                         goto no_store;
 | |
| 
 | |
|                     if (cmpResult && h->BOP == BOP_OR)
 | |
|                         goto store;
 | |
| 
 | |
|                     argsOffset += sizeof(uint16_t) + args->len;
 | |
|                     argIndex++;
 | |
|                     args = (DataValue*) &niceInput[argsOffset];
 | |
| 
 | |
|                     if ( utf8)
 | |
|                     {
 | |
|                         arg_utf8 = string(args->data, args->len);
 | |
|                     }
 | |
|                 }
 | |
| 
 | |
|                 if (i == h->NVALS && cmpResult)
 | |
|                     goto store;
 | |
|                 else
 | |
|                     goto no_store;
 | |
|             }
 | |
|         }
 | |
| 
 | |
| store:
 | |
| 
 | |
|         if (h->OutputType == OT_DATAVALUE)
 | |
|         {
 | |
|             if ((ret->NBYTES + sizeof(DataValue) + siglen) > outSize)
 | |
|             {
 | |
|                 MessageLog logger(LoggingID(28));
 | |
|                 logging::Message::Args marker;
 | |
|                 Message msg(35);
 | |
| 
 | |
|                 marker.add(8);
 | |
|                 msg.format(marker);
 | |
|                 logger.logErrorMessage(msg);
 | |
| 
 | |
|                 throw logging::DictionaryBufferOverflow();
 | |
|             }
 | |
| 
 | |
|             retDataValues->len = siglen;
 | |
|             memcpy(retDataValues->data, sig, siglen);
 | |
|             rdvOffset += sizeof(DataValue) + siglen;
 | |
|             retDataValues = (DataValue*) &niceRet[rdvOffset];
 | |
|             ret->NVALS++;
 | |
|             ret->NBYTES += sizeof(DataValue) + siglen;
 | |
|         }
 | |
|         else if (h->OutputType == OT_TOKEN)
 | |
|         {
 | |
|             if ((ret->NBYTES + sizeof(PrimToken)) > outSize)
 | |
|             {
 | |
|                 MessageLog logger(LoggingID(28));
 | |
|                 logging::Message::Args marker;
 | |
|                 Message msg(35);
 | |
| 
 | |
|                 marker.add(9);
 | |
|                 msg.format(marker);
 | |
|                 logger.logErrorMessage(msg);
 | |
| 
 | |
|                 throw logging::DictionaryBufferOverflow();
 | |
|             }
 | |
| 
 | |
|             retTokens[ret->NVALS].LBID = h->LBID;
 | |
|             retTokens[ret->NVALS].offset = offsetIndex;  // need index rather than the block offset... rp 12/19/06
 | |
|             retTokens[ret->NVALS].len = args->len;
 | |
|             ret->NVALS++;
 | |
|             ret->NBYTES += sizeof(PrimToken);
 | |
|         }
 | |
|         /*
 | |
|          * XXXPAT: HACK!  Ron requested a special case where the input string
 | |
|          * that matched and the token of the matched string were returned.
 | |
|          * It will not be used in cases where there are multiple input strings.
 | |
|          * We need to rethink the requirements for this primitive after Dec 15.
 | |
|          */
 | |
|         else if (h->OutputType == OT_BOTH)
 | |
|         {
 | |
|             if (ret->NBYTES + sizeof(PrimToken) + sizeof(DataValue) + args->len > outSize)
 | |
|             {
 | |
|                 MessageLog logger(LoggingID(28));
 | |
|                 logging::Message::Args marker;
 | |
|                 Message msg(35);
 | |
| 
 | |
|                 marker.add(10);
 | |
|                 msg.format(marker);
 | |
|                 logger.logErrorMessage(msg);
 | |
| 
 | |
|                 throw logging::DictionaryBufferOverflow();
 | |
|             }
 | |
| 
 | |
|             retDataValues->len = args->len;
 | |
|             memcpy(retDataValues->data, args->data, args->len);
 | |
|             rdvOffset += sizeof(DataValue) + args->len;
 | |
|             retTokens = reinterpret_cast<PrimToken*>(&niceRet[rdvOffset]);
 | |
|             retTokens->LBID = h->LBID;
 | |
|             retTokens->offset = offsetIndex;  // need index rather than the block offset... rp 12/19/06
 | |
|             retTokens->len = args->len;
 | |
|             rdvOffset += sizeof(PrimToken);
 | |
|             retDataValues = reinterpret_cast<DataValue*>(&niceRet[rdvOffset]);
 | |
|             ret->NBYTES += sizeof(PrimToken) + sizeof(DataValue) + args->len;
 | |
|             ret->NVALS++;
 | |
|         }
 | |
| 
 | |
| no_store:
 | |
|         ;			//this is intentional
 | |
|     }
 | |
| 
 | |
|     return;
 | |
| }
 | |
| 
 | |
| void PrimitiveProcessor::nextSig(int NVALS,
 | |
|                                  const PrimToken* tokens,
 | |
|                                  p_DataValue* ret,
 | |
|                                  uint8_t outputFlags,
 | |
|                                  bool oldGetSigBehavior, bool skipNulls) throw()
 | |
| {
 | |
|     const uint8_t* niceBlock = reinterpret_cast<const uint8_t*>(block);
 | |
|     const uint16_t* offsets
 | |
|         = reinterpret_cast<const uint16_t*>(&niceBlock[10]);
 | |
| 
 | |
|     if (NVALS == 0)
 | |
|     {
 | |
|         if (offsets[dict_OffsetIndex + 1] == 0xffff)
 | |
|         {
 | |
|             ret->len = -1;
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         ret->len = offsets[dict_OffsetIndex] - offsets[dict_OffsetIndex + 1];
 | |
|         ret->data = &niceBlock[offsets[dict_OffsetIndex + 1]];
 | |
| 
 | |
|         if (outputFlags & OT_TOKEN)
 | |
|             currentOffsetIndex = dict_OffsetIndex + 1;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
| 
 | |
| again:
 | |
| 
 | |
|         if (dict_OffsetIndex >= NVALS)
 | |
|         {
 | |
|             ret->len = -1;
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         if (oldGetSigBehavior)
 | |
|         {
 | |
| 
 | |
|             const OldGetSigParams* oldParams =
 | |
|                 reinterpret_cast<const OldGetSigParams*>(tokens);
 | |
| 
 | |
|             if (oldParams[dict_OffsetIndex].rid & 0x8000000000000000LL)
 | |
|             {
 | |
|                 if (skipNulls)
 | |
|                 {
 | |
|                     /* Bug 3321.  For some cases the NULL token should be skipped.  The
 | |
|                      * isnull filter is handled by token columncommand or by the F & E
 | |
|                      * framework.  This primitive should only process nulls
 | |
|                      * when it's for projection.
 | |
|                      */
 | |
|                     dict_OffsetIndex++;
 | |
|                     goto again;
 | |
|                 }
 | |
| 
 | |
|                 ret->len = nullStringLen;
 | |
|                 ret->data = (const uint8_t*) nullString;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 ret->len = offsets[oldParams[dict_OffsetIndex].offsetIndex - 1] -
 | |
|                            offsets[oldParams[dict_OffsetIndex].offsetIndex];
 | |
|                 //Whoa! apparently we have come across a missing signature! That is, the requested ordinal
 | |
|                 //  is larger than the number of signatures in this block. Return a "special" string so that
 | |
|                 //  the query keeps going, but that can be recognized as an internal error upon inspection.
 | |
|                 //@Bug 2534. Change the length check to 8000
 | |
| 
 | |
|                 // MCOL-267:
 | |
|                 // With BLOB support we have had to increase this to 8176
 | |
|                 // because a BLOB can take 8176 bytes of a dictionary block
 | |
|                 // instead of the fixed 8000 with CHAR/VARCHAR
 | |
|                 if (ret->len < 0 || ret->len > 8176)
 | |
|                 {
 | |
|                     ret->data = reinterpret_cast<const uint8_t*>(signatureNotFound);
 | |
|                     ret->len = strlen(reinterpret_cast<const char*>(ret->data));
 | |
|                 }
 | |
|                 else
 | |
|                     ret->data = &niceBlock[offsets[oldParams[dict_OffsetIndex].offsetIndex]];
 | |
|             }
 | |
| 
 | |
| // 			idbassert(ret->len >= 0);
 | |
|             currentOffsetIndex = oldParams[dict_OffsetIndex].offsetIndex;
 | |
|             dict_OffsetIndex++;
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         /* XXXPAT: Need to check for the NULL token here */
 | |
|         ret->len = tokens[dict_OffsetIndex].len;
 | |
|         ret->data = &niceBlock[tokens[dict_OffsetIndex].offset];
 | |
| 
 | |
|         if (outputFlags & OT_TOKEN)
 | |
|         {
 | |
|             //offsets = reinterpret_cast<const uint16_t *>(&niceBlock[10]);
 | |
|             for (currentOffsetIndex = 1; offsets[currentOffsetIndex] != 0xffff; currentOffsetIndex++)
 | |
|                 if (tokens[dict_OffsetIndex].offset == offsets[currentOffsetIndex])
 | |
|                     break;
 | |
| 
 | |
|             if (offsets[currentOffsetIndex] == 0xffff)
 | |
|             {
 | |
|                 MessageLog logger(LoggingID(28));
 | |
|                 logging::Message::Args offset;
 | |
|                 Message msg(38);
 | |
| 
 | |
|                 offset.add(tokens[dict_OffsetIndex].offset);
 | |
|                 msg.format(offset);
 | |
|                 logger.logErrorMessage(msg);
 | |
| 
 | |
|                 currentOffsetIndex = -1;
 | |
|                 dict_OffsetIndex++;
 | |
|                 return;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     dict_OffsetIndex++;
 | |
| }
 | |
| 
 | |
| void PrimitiveProcessor::p_AggregateSignature(const AggregateSignatureRequestHeader* in,
 | |
|         AggregateSignatureResultHeader* out, unsigned outSize, unsigned* written, bool utf8)
 | |
| {
 | |
| 
 | |
|     uint8_t* niceOutput;		// h cast to a byte-indexed type
 | |
|     int cmp;
 | |
|     char cMin[BLOCK_SIZE], cMax[BLOCK_SIZE];
 | |
|     int cMinLen, cMaxLen;
 | |
|     p_DataValue sigptr;
 | |
| 
 | |
|     DataValue* min;
 | |
|     DataValue* max;
 | |
| 
 | |
|     memcpy(out, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader));
 | |
|     out->ism.Command = DICT_AGGREGATE_RESULTS;
 | |
|     niceOutput = reinterpret_cast<uint8_t*>(out);
 | |
| 
 | |
|     // The first sig is the min and the max.
 | |
|     out->Count = 0;
 | |
|     dict_OffsetIndex = 0;
 | |
|     nextSig(in->NVALS, in->tokens, &sigptr);
 | |
| 
 | |
|     if (sigptr.len == -1)
 | |
|         return;
 | |
| 
 | |
|     out->Count++;
 | |
|     memcpy(cMin, sigptr.data, sigptr.len);
 | |
|     memcpy(cMax, sigptr.data, sigptr.len);
 | |
|     cMinLen = cMaxLen = sigptr.len;
 | |
| 
 | |
|     for (nextSig(in->NVALS, in->tokens, &sigptr); sigptr.len != -1;
 | |
|             nextSig(in->NVALS, in->tokens, &sigptr), out->Count++)
 | |
|     {
 | |
|         string sig_utf8;
 | |
| 
 | |
|         if (utf8)
 | |
|         {
 | |
|             string cMin_utf8(cMin, cMinLen);
 | |
|             string tmpString((char*)sigptr.data, sigptr.len);
 | |
|             sig_utf8 = tmpString;
 | |
|             cmp = utf8::idb_strcoll(cMin_utf8.c_str(), sig_utf8.c_str());
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             cmp = strncmp(cMin, (char*)sigptr.data, std::min(cMinLen, sigptr.len));
 | |
|         }
 | |
| 
 | |
|         if (cmp > 0)
 | |
|         {
 | |
|             memcpy(cMin, sigptr.data, sigptr.len);
 | |
|             cMinLen = sigptr.len;
 | |
|         }
 | |
| 
 | |
|         if (utf8)
 | |
|         {
 | |
|             string cMax_utf8(cMax, cMaxLen);
 | |
|             cmp = utf8::idb_strcoll(cMax_utf8.c_str(), sig_utf8.c_str());
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             cmp = strncmp(cMax, (char*)sigptr.data, std::min(cMaxLen, sigptr.len));
 | |
|         }
 | |
| 
 | |
|         if (cmp < 0)
 | |
|         {
 | |
|             memcpy(cMax, sigptr.data, sigptr.len);
 | |
|             cMaxLen = sigptr.len;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     //we now have the results, stuff them into the output buffer
 | |
| #ifdef PRIM_DEBUG
 | |
|     unsigned size = sizeof(AggregateSignatureResultHeader) + cMaxLen + cMinLen
 | |
|                     + sizeof(uint16_t) * 2;
 | |
| 
 | |
|     if (outSize < size)
 | |
|     {
 | |
|         MessageLog logger(LoggingID(28));
 | |
|         logging::Message::Args marker;
 | |
|         Message msg(35);
 | |
| 
 | |
|         marker.add(11);
 | |
|         msg.format(marker);
 | |
|         logger.logErrorMessage(msg);
 | |
| 
 | |
|         throw length_error("PrimitiveProcessor::p_AggregateSignature(): output buffer is too small");
 | |
|     }
 | |
| 
 | |
| #endif
 | |
| 
 | |
|     min = reinterpret_cast<DataValue*>
 | |
|           (&niceOutput[sizeof(AggregateSignatureResultHeader)]);
 | |
|     max = reinterpret_cast<DataValue*>
 | |
|           (&niceOutput[sizeof(AggregateSignatureResultHeader) + cMinLen + sizeof(uint16_t)]);
 | |
|     min->len = cMinLen;
 | |
|     max->len = cMaxLen;
 | |
|     memcpy(min->data, cMin, cMinLen);
 | |
|     memcpy(max->data, cMax, cMaxLen);
 | |
|     *written = sizeof(AggregateSignatureResultHeader) + cMaxLen + cMinLen
 | |
|                + sizeof(uint16_t) * 2;
 | |
| }
 | |
| 
 | |
| const char backslash = '\\';
 | |
| 
 | |
| inline bool PrimitiveProcessor::isEscapedChar(char c)
 | |
| {
 | |
|     return ('%' == c || '_' == c);
 | |
| }
 | |
| 
 | |
| //FIXME: copy/pasted to dataconvert.h: refactor
 | |
| int PrimitiveProcessor::convertToRegexp(idb_regex_t* regex, const p_DataValue* str)
 | |
| {
 | |
|     //In the worst case, every char is quadrupled, plus some leading/trailing cruft...
 | |
|     char* cBuf = (char*)alloca(((4 * str->len) + 3) * sizeof(char));
 | |
|     char c;
 | |
|     int i, cBufIdx = 0;
 | |
|     // translate to regexp symbols
 | |
|     cBuf[cBufIdx++] = '^';  // implicit leading anchor
 | |
| 
 | |
|     for (i = 0; i < str->len; i++)
 | |
|     {
 | |
|         c = (char) str->data[i];
 | |
| 
 | |
|         switch (c)
 | |
|         {
 | |
| 
 | |
|             // chars to substitute
 | |
|             case '%':
 | |
|                 cBuf[cBufIdx++] = '.';
 | |
|                 cBuf[cBufIdx++] = '*';
 | |
|                 break;
 | |
| 
 | |
|             case '_':
 | |
|                 cBuf[cBufIdx++] = '.';
 | |
|                 break;
 | |
| 
 | |
|             // escape the chars that are special in regexp's but not in SQL
 | |
|             // default special characters in perl: .[{}()\*+?|^$
 | |
|             case '.':
 | |
|             case '*':
 | |
|             case '^':
 | |
|             case '$':
 | |
|             case '?':
 | |
|             case '+':
 | |
|             case '|':
 | |
|             case '[':
 | |
|             case ']':
 | |
|             case '{':
 | |
|             case '}':
 | |
|             case '(':
 | |
|             case ')':
 | |
|                 cBuf[cBufIdx++] = backslash;
 | |
|                 cBuf[cBufIdx++] = c;
 | |
|                 break;
 | |
| 
 | |
|             case backslash:  //this is the sql escape char
 | |
|                 if ( i + 1 < str->len)
 | |
|                 {
 | |
|                     if (isEscapedChar(str->data[i + 1]))
 | |
|                     {
 | |
|                         cBuf[cBufIdx++] = str->data[++i];
 | |
|                         break;
 | |
|                     }
 | |
|                     else if (backslash == str->data[i + 1])
 | |
|                     {
 | |
|                         cBuf[cBufIdx++] = c;
 | |
|                         cBuf[cBufIdx++] = str->data[++i];
 | |
|                         break;
 | |
|                     }
 | |
| 
 | |
|                 }  //single slash
 | |
| 
 | |
|                 cBuf[cBufIdx++] = backslash;
 | |
|                 cBuf[cBufIdx++] = c;
 | |
|                 break;
 | |
| 
 | |
|             default:
 | |
|                 cBuf[cBufIdx++] = c;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     cBuf[cBufIdx++] = '$';  // implicit trailing anchor
 | |
|     cBuf[cBufIdx++] = '\0';
 | |
| 
 | |
| #ifdef VERBOSE
 | |
|     cerr << "regexified string is " << cBuf << endl;
 | |
| #endif
 | |
| 
 | |
| #ifdef POSIX_REGEX
 | |
|     regcomp(®ex->regex, cBuf, REG_NOSUB | REG_EXTENDED);
 | |
| #else
 | |
|     regex->regex = cBuf;
 | |
| #endif
 | |
|     regex->used = true;
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| bool PrimitiveProcessor::isLike(const p_DataValue* dict, const idb_regex_t* regex) throw()
 | |
| {
 | |
| #ifdef POSIX_REGEX
 | |
|     char cBuf[dict->len + 1];
 | |
|     memcpy(cBuf, dict->data, dict->len);
 | |
|     cBuf[dict->len] = '\0';
 | |
| 
 | |
|     return (regexec(®ex->regex, cBuf, 0, NULL, 0) == 0);
 | |
| #else
 | |
|     /* Note, the passed-in pointers are effectively begin() and end() iterators */
 | |
|     return regex_match(dict->data, dict->data + dict->len, regex->regex);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| boost::shared_array<idb_regex_t>
 | |
| PrimitiveProcessor::makeLikeFilter (const DictFilterElement* filterString, uint32_t count)
 | |
| {
 | |
|     boost::shared_array<idb_regex_t> ret;
 | |
|     uint32_t filterIndex, filterOffset;
 | |
|     uint8_t* in8 = (uint8_t*) filterString;
 | |
|     const DictFilterElement* filter;
 | |
|     p_DataValue filterptr = {0, NULL};
 | |
| 
 | |
|     for (filterIndex = 0, filterOffset = 0; filterIndex < count; filterIndex++)
 | |
|     {
 | |
|         filter = reinterpret_cast<const DictFilterElement*>(&in8[filterOffset]);
 | |
| 
 | |
|         if (filter->COP & COMPARE_LIKE)
 | |
|         {
 | |
|             if (!ret)
 | |
|                 ret.reset(new idb_regex_t[count]);
 | |
| 
 | |
|             filterptr.len = filter->len;
 | |
|             filterptr.data = filter->data;
 | |
|             convertToRegexp(&ret[filterIndex], &filterptr);
 | |
|         }
 | |
| 
 | |
|         filterOffset += sizeof(DictFilterElement) + filter->len;
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out, bool utf8,
 | |
|                                       bool skipNulls, boost::shared_ptr<DictEqualityFilter> eqFilter, uint8_t eqOp)
 | |
| {
 | |
|     PrimToken* outToken;
 | |
|     const DictFilterElement* filter = 0;
 | |
|     const uint8_t* in8;
 | |
|     DataValue* outValue;
 | |
|     p_DataValue min = {0, NULL}, max = {0, NULL}, sigptr = {0, NULL};
 | |
|     int tmp, filterIndex, filterOffset;
 | |
|     uint16_t aggCount;
 | |
|     bool cmpResult;
 | |
|     DictOutput header;
 | |
| 
 | |
|     // default size of the ouput to something sufficiently large to prevent
 | |
|     // excessive reallocation and copy when resizing
 | |
|     const unsigned DEF_OUTSIZE = 16 * 1024;
 | |
|     // use this factor to scale out size of future resize calls
 | |
|     const int SCALE_FACTOR = 2;
 | |
|     out->resize(DEF_OUTSIZE);
 | |
| 
 | |
|     in8 = reinterpret_cast<const uint8_t*>(in);
 | |
| 
 | |
|     memcpy(&header, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader));
 | |
|     header.ism.Command = DICT_RESULTS;
 | |
|     header.NVALS = 0;
 | |
|     header.LBID = in->LBID;
 | |
|     dict_OffsetIndex = 0;
 | |
|     filterIndex = 0;
 | |
|     aggCount = 0;
 | |
|     min.len = 0;
 | |
|     max.len = 0;
 | |
| 
 | |
|     //...Initialize I/O counts
 | |
|     header.CacheIO    = 0;
 | |
|     header.PhysicalIO = 0;
 | |
| 
 | |
|     header.NBYTES = sizeof(DictOutput);
 | |
| 
 | |
|     for (nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType,
 | |
|                  (in->InputFlags ? true : false), skipNulls);
 | |
|             sigptr.len != -1;
 | |
|             nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType,
 | |
|                     (in->InputFlags ? true : false), skipNulls))
 | |
|     {
 | |
| 
 | |
|         string sig_utf8;
 | |
| 
 | |
|         if (utf8)
 | |
|         {
 | |
|             string tmpString((char*)sigptr.data, sigptr.len);
 | |
|             sig_utf8 = tmpString;
 | |
|         }
 | |
| 
 | |
|         // do aggregate processing
 | |
|         if (in->OutputType & OT_AGGREGATE)
 | |
|         {
 | |
|             // len == 0 indicates this is the first pass
 | |
|             if (max.len != 0)
 | |
|             {
 | |
|                 if (utf8 )
 | |
|                 {
 | |
|                     string max_utf8((char*)max.data, max.len);
 | |
|                     tmp = utf8::idb_strcoll(sig_utf8.c_str(), max_utf8.c_str());
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     tmp = strncmp((char*)sigptr.data, (char*)max.data, std::min(sigptr.len, max.len));
 | |
|                 }
 | |
| 
 | |
|                 if (tmp > 0)
 | |
|                     max = sigptr;
 | |
|             }
 | |
|             else
 | |
|                 max = sigptr;
 | |
| 
 | |
|             if (min.len != 0)
 | |
|             {
 | |
|                 if (utf8)
 | |
|                 {
 | |
|                     string min_utf8((char*)min.data, min.len);
 | |
|                     tmp = utf8::idb_strcoll(sig_utf8.c_str(), min_utf8.c_str());
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     tmp = strncmp((char*)sigptr.data, (char*)min.data, std::min(sigptr.len, min.len));
 | |
|                 }
 | |
| 
 | |
|                 if (tmp < 0)
 | |
|                     min = sigptr;
 | |
|             }
 | |
|             else
 | |
|                 min = sigptr;
 | |
| 
 | |
|             aggCount++;
 | |
|         }
 | |
| 
 | |
|         // filter processing
 | |
|         if (in->InputFlags == 1)
 | |
|             filterOffset = sizeof(DictInput) + (in->NVALS * sizeof(OldGetSigParams));
 | |
|         else
 | |
|             filterOffset = sizeof(DictInput) + (in->NVALS * sizeof(PrimToken));
 | |
| 
 | |
|         if (eqFilter)
 | |
|         {
 | |
|             // MCOL-1246 Trim whitespace before match
 | |
|             string strData((char*)sigptr.data, sigptr.len);
 | |
|             boost::trim_right_if(strData, boost::is_any_of(" "));
 | |
|             bool gotIt = eqFilter->find(strData) != eqFilter->end();
 | |
| 
 | |
|             if ((gotIt && eqOp == COMPARE_EQ) || (!gotIt && eqOp == COMPARE_NE))
 | |
|                 goto store;
 | |
| 
 | |
|             goto no_store;
 | |
|         }
 | |
| 
 | |
|         for (filterIndex = 0; filterIndex < in->NOPS; filterIndex++)
 | |
|         {
 | |
|             filter = reinterpret_cast<const DictFilterElement*>(&in8[filterOffset]);
 | |
|             string filt_utf8;
 | |
|             size_t filt_utf8_len = 0;
 | |
| 
 | |
|             if (utf8)
 | |
|             {
 | |
|                 string tmpString((const char*)filter->data, filter->len);
 | |
|                 filt_utf8 = tmpString;
 | |
|                 filt_utf8_len = filt_utf8.length();
 | |
|             }
 | |
| 
 | |
|             if (filter->COP & COMPARE_LIKE)
 | |
|             {
 | |
|                 cmpResult = isLike(&sigptr, &parsedLikeFilter[filterIndex]);
 | |
| 
 | |
|                 if (filter->COP & COMPARE_NOT)
 | |
|                     cmpResult = !cmpResult;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 if (utf8)
 | |
|                 {
 | |
|                     size_t sig_utf8_len = sig_utf8.length();
 | |
|                     tmp = utf8::idb_strcoll(sig_utf8.c_str(), filt_utf8.c_str());
 | |
|                     cmpResult = compare(tmp, filter->COP, sig_utf8_len, filt_utf8_len);
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     tmp = strncmp((const char*) sigptr.data, (const char*)filter->data,
 | |
|                                   std::min(sigptr.len, static_cast<int>(filter->len)));
 | |
|                 }
 | |
| 
 | |
|                 cmpResult = compare(tmp, filter->COP, sigptr.len, filter->len);
 | |
|             }
 | |
| 
 | |
|             if (!cmpResult && in->BOP != BOP_OR)
 | |
|                 goto no_store;
 | |
| 
 | |
|             if (cmpResult && in->BOP != BOP_AND)
 | |
|                 goto store;
 | |
| 
 | |
|             filterOffset += sizeof(DictFilterElement) + filter->len;
 | |
|         }
 | |
| 
 | |
|         if (filterIndex == in->NOPS && in->BOP != BOP_OR)
 | |
|         {
 | |
| store:
 | |
|             //cout << "storing it, str = " << string((char *)sigptr.data, sigptr.len) << endl;
 | |
|             header.NVALS++;
 | |
| 
 | |
|             if (in->OutputType & OT_RID && in->InputFlags == 1)  			// hack that indicates old GetSignature behavior
 | |
|             {
 | |
|                 const OldGetSigParams* oldParams;
 | |
|                 uint64_t* outRid;
 | |
|                 oldParams = reinterpret_cast<const OldGetSigParams*>(in->tokens);
 | |
|                 uint32_t newlen = header.NBYTES + 8;
 | |
| 
 | |
|                 if ( newlen > out->size() )
 | |
|                 {
 | |
|                     out->resize( out->size() * SCALE_FACTOR );
 | |
|                 }
 | |
| 
 | |
|                 outRid = (uint64_t*) & (*out)[header.NBYTES];
 | |
|                 // mask off the upper bit of the rid; signifies the NULL token was passed in
 | |
|                 *outRid = (oldParams[dict_OffsetIndex - 1].rid & 0x7fffffffffffffffLL);
 | |
|                 header.NBYTES += 8;
 | |
|             }
 | |
| 
 | |
|             if (in->OutputType & OT_INPUTARG && in->InputFlags == 0)
 | |
|             {
 | |
|                 uint32_t newlen = header.NBYTES + sizeof(DataValue) + filter->len;
 | |
| 
 | |
|                 if ( newlen > out->size() )
 | |
|                 {
 | |
|                     out->resize( out->size() * SCALE_FACTOR );
 | |
|                 }
 | |
| 
 | |
|                 outValue = reinterpret_cast<DataValue*>(&(*out)[header.NBYTES]);
 | |
|                 outValue->len = filter->len;
 | |
|                 memcpy(outValue->data, filter->data, filter->len);
 | |
|                 header.NBYTES += sizeof(DataValue) + filter->len;
 | |
|             }
 | |
| 
 | |
|             if (in->OutputType & OT_TOKEN)
 | |
|             {
 | |
|                 uint32_t newlen = header.NBYTES + sizeof(PrimToken);
 | |
| 
 | |
|                 if ( newlen > out->size() )
 | |
|                 {
 | |
|                     out->resize( out->size() * SCALE_FACTOR );
 | |
|                 }
 | |
| 
 | |
|                 outToken = reinterpret_cast<PrimToken*>(&(*out)[header.NBYTES]);
 | |
|                 outToken->LBID = in->LBID;
 | |
|                 outToken->offset = currentOffsetIndex;
 | |
|                 outToken->len = filter->len;
 | |
|                 header.NBYTES += sizeof(PrimToken);
 | |
|             }
 | |
| 
 | |
|             if (in->OutputType & OT_DATAVALUE)
 | |
|             {
 | |
|                 uint32_t newlen = header.NBYTES + sizeof(DataValue) + sigptr.len;
 | |
| 
 | |
|                 if ( newlen > out->size() )
 | |
|                 {
 | |
|                     out->resize( out->size() * SCALE_FACTOR );
 | |
|                 }
 | |
| 
 | |
|                 outValue = reinterpret_cast<DataValue*>(&(*out)[header.NBYTES]);
 | |
|                 outValue->len = sigptr.len;
 | |
|                 memcpy(outValue->data, sigptr.data, sigptr.len);
 | |
|                 header.NBYTES += sizeof(DataValue) + sigptr.len;
 | |
|             }
 | |
|         }
 | |
| 
 | |
| no_store: ;  // intentional
 | |
|     }
 | |
| 
 | |
|     if (in->OutputType & OT_AGGREGATE)
 | |
|     {
 | |
|         uint32_t newlen = header.NBYTES + 3 * sizeof(uint16_t) + min.len + max.len;
 | |
| 
 | |
|         if ( newlen > out->size() )
 | |
|         {
 | |
|             out->resize( out->size() * SCALE_FACTOR );
 | |
|         }
 | |
| 
 | |
|         uint16_t* tmp16 = reinterpret_cast<uint16_t*>(&(*out)[header.NBYTES]);
 | |
|         DataValue* tmpDV = reinterpret_cast<DataValue*>(&(*out)[header.NBYTES + sizeof(uint16_t)]);
 | |
| 
 | |
|         *tmp16 = aggCount;
 | |
|         tmpDV->len = min.len;
 | |
|         memcpy(tmpDV->data, min.data, min.len);
 | |
|         header.NBYTES += 2 * sizeof(uint16_t) + min.len;
 | |
| 
 | |
|         tmpDV = reinterpret_cast<DataValue*>(&(*out)[header.NBYTES]);
 | |
|         tmpDV->len = max.len;
 | |
|         memcpy(tmpDV->data, max.data, max.len);
 | |
|         header.NBYTES += sizeof(uint16_t) + max.len;
 | |
|     }
 | |
| 
 | |
|     out->resize( header.NBYTES );
 | |
| 
 | |
|     memcpy(&(*out)[0], &header, sizeof(DictOutput));
 | |
| }
 | |
| 
 | |
| }
 | |
| // vim:ts=4 sw=4:
 | |
| 
 |