You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
the begginning
This commit is contained in:
891
primitives/linux-port/dictionary.cpp
Normal file
891
primitives/linux-port/dictionary.cpp
Normal file
@ -0,0 +1,891 @@
|
||||
/* Copyright (C) 2014 InfiniDB, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
/*
|
||||
* $Id: dictionary.cpp 2122 2013-07-08 16:33:50Z bpaul $
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <boost/scoped_array.hpp>
|
||||
#include <sys/types.h>
|
||||
using namespace std;
|
||||
|
||||
#include "primitiveprocessor.h"
|
||||
#include "we_type.h"
|
||||
#include "messagelog.h"
|
||||
#include "messageobj.h"
|
||||
#include "exceptclasses.h"
|
||||
#include "utils_utf8.h"
|
||||
#include <sstream>
|
||||
|
||||
using namespace funcexp;
|
||||
using namespace logging;
|
||||
|
||||
const char *nullString = " "; // this is not NULL to preempt segfaults.
|
||||
const int nullStringLen = 0;
|
||||
|
||||
namespace
|
||||
{
|
||||
const char* signatureNotFound = joblist::CPSTRNOTFOUND.c_str();
|
||||
}
|
||||
|
||||
namespace primitives
|
||||
{
|
||||
|
||||
inline bool PrimitiveProcessor::compare(int cmp, uint8_t COP, int len1, int len2) throw()
|
||||
{
|
||||
|
||||
switch(COP) {
|
||||
case COMPARE_NIL:
|
||||
return false;
|
||||
case COMPARE_LT:
|
||||
return (cmp < 0 || (cmp == 0 && len1 < len2));
|
||||
case COMPARE_EQ:
|
||||
return (cmp == 0 && len1 == len2 ? true : false);
|
||||
case COMPARE_LE:
|
||||
return (cmp < 0 || (cmp == 0 && len1 <= len2));
|
||||
case COMPARE_GT:
|
||||
return (cmp > 0 || (cmp == 0 && len1 > len2));
|
||||
case COMPARE_NE:
|
||||
return (cmp != 0 || len1 != len2 ? true : false);
|
||||
case COMPARE_GE:
|
||||
return (cmp > 0 || (cmp == 0 && len1 >= len2));
|
||||
case COMPARE_LIKE:
|
||||
return cmp; // is done elsewhere; shouldn't get here. Exception?
|
||||
case COMPARE_NOT:
|
||||
return false; // throw an exception here?
|
||||
default:
|
||||
MessageLog logger(LoggingID(28));
|
||||
logging::Message::Args colWidth;
|
||||
Message msg(34);
|
||||
|
||||
colWidth.add(COP);
|
||||
colWidth.add("compare");
|
||||
msg.format(colWidth);
|
||||
logger.logErrorMessage(msg);
|
||||
return false; // throw an exception here?
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Notes:
|
||||
- assumes no continuation pointer
|
||||
*/
|
||||
|
||||
void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader *h,
|
||||
TokenByScanResultHeader *ret, unsigned outSize, bool utf8,
|
||||
boost::shared_ptr<DictEqualityFilter> eqFilter)
|
||||
{
|
||||
const DataValue *args;
|
||||
const uint8_t *niceBlock; // block cast to a byte-indexed type
|
||||
const uint8_t *niceInput; // h cast to a byte-indexed type
|
||||
const uint16_t *offsets;
|
||||
int offsetIndex, argIndex, argsOffset;
|
||||
bool cmpResult=false;
|
||||
int tmp, i, err;
|
||||
|
||||
const char *sig;
|
||||
uint16_t siglen;
|
||||
|
||||
PrimToken *retTokens;
|
||||
DataValue *retDataValues;
|
||||
int rdvOffset;
|
||||
uint8_t *niceRet; // ret cast to a byte-indexed type
|
||||
|
||||
boost::scoped_array<idb_regex_t> regex;
|
||||
|
||||
// set up pointers to fields within each structure
|
||||
|
||||
// either retTokens or retDataValues will be used but not both.
|
||||
niceRet = reinterpret_cast<uint8_t *>(ret);
|
||||
rdvOffset = sizeof(TokenByScanResultHeader);
|
||||
|
||||
retTokens = reinterpret_cast<PrimToken *>(&niceRet[rdvOffset]);
|
||||
retDataValues = reinterpret_cast<DataValue *>(&niceRet[rdvOffset]);
|
||||
memcpy(ret, h, sizeof(PrimitiveHeader) + sizeof(ISMPacketHeader));
|
||||
ret->NVALS = 0;
|
||||
ret->NBYTES = sizeof(TokenByScanResultHeader);
|
||||
ret->ism.Command = DICT_SCAN_COMPARE_RESULTS;
|
||||
|
||||
//...Initialize I/O counts
|
||||
ret->CacheIO = 0;
|
||||
ret->PhysicalIO = 0;
|
||||
|
||||
niceBlock = reinterpret_cast<const uint8_t *>(block);
|
||||
offsets = reinterpret_cast<const uint16_t *>(&niceBlock[10]);
|
||||
niceInput = reinterpret_cast<const uint8_t *>(h);
|
||||
|
||||
// if LIKE is an operator, compile regexp's in advance.
|
||||
if ((h->NVALS > 0 && h->COP1 & COMPARE_LIKE) ||
|
||||
(h->NVALS == 2 && h->COP2 & COMPARE_LIKE)) {
|
||||
regex.reset(new idb_regex_t[h->NVALS]);
|
||||
for (i = 0, argsOffset = sizeof(TokenByScanRequestHeader); i < h->NVALS; i++) {
|
||||
p_DataValue pdvTmp;
|
||||
|
||||
args = reinterpret_cast<const DataValue *>(&niceInput[argsOffset]);
|
||||
pdvTmp.len = args->len;
|
||||
pdvTmp.data = (const uint8_t *) args->data;
|
||||
err = convertToRegexp(®ex[i], &pdvTmp);
|
||||
if (err != 0) {
|
||||
MessageLog logger(LoggingID(28));
|
||||
Message msg(37);
|
||||
logger.logErrorMessage(msg);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
argsOffset += sizeof(uint16_t) + args->len;
|
||||
}
|
||||
}
|
||||
|
||||
for (offsetIndex = 1; offsets[offsetIndex] != 0xffff; offsetIndex++) {
|
||||
|
||||
siglen = offsets[offsetIndex-1] - offsets[offsetIndex];
|
||||
sig = reinterpret_cast<const char *>(&niceBlock[offsets[offsetIndex]]);
|
||||
argsOffset = sizeof(TokenByScanRequestHeader);
|
||||
argIndex = 0;
|
||||
args = reinterpret_cast<const DataValue *>(&niceInput[argsOffset]);
|
||||
|
||||
string sig_utf8;
|
||||
string arg_utf8;
|
||||
|
||||
if (eqFilter) {
|
||||
bool gotIt = eqFilter->find(string(sig, siglen)) != eqFilter->end();
|
||||
if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE &&
|
||||
!gotIt))
|
||||
goto store;
|
||||
goto no_store;
|
||||
}
|
||||
|
||||
// BUG 5110: If it is utf, we need to create utf strings to compare
|
||||
if(utf8)
|
||||
{
|
||||
sig_utf8 = string(sig, siglen);
|
||||
arg_utf8 = string(args->data, args->len);
|
||||
}
|
||||
switch (h->NVALS) {
|
||||
case 1: {
|
||||
if (h->COP1 & COMPARE_LIKE) {
|
||||
p_DataValue dv;
|
||||
|
||||
dv.len = siglen;
|
||||
dv.data = (uint8_t *) sig;
|
||||
cmpResult = isLike(&dv, ®ex[argIndex]);
|
||||
if (h->COP1 & COMPARE_NOT)
|
||||
cmpResult = !cmpResult;
|
||||
}
|
||||
else {
|
||||
if (utf8) {
|
||||
tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str());
|
||||
cmpResult = compare(tmp, h->COP1, siglen, args->len);
|
||||
} else {
|
||||
tmp = strncmp(sig, args->data, std::min(siglen, args->len));
|
||||
cmpResult = compare(tmp, h->COP1, siglen, args->len);
|
||||
}
|
||||
}
|
||||
if (cmpResult)
|
||||
goto store;
|
||||
goto no_store;
|
||||
}
|
||||
case 2: {
|
||||
if (h->COP1 & COMPARE_LIKE) {
|
||||
p_DataValue dv;
|
||||
|
||||
dv.len = siglen;
|
||||
dv.data = (uint8_t *) sig;
|
||||
cmpResult = isLike(&dv, ®ex[argIndex]);
|
||||
if (h->COP1 & COMPARE_NOT)
|
||||
cmpResult = !cmpResult;
|
||||
}
|
||||
|
||||
else {
|
||||
if (utf8) {
|
||||
tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str());
|
||||
cmpResult = compare(tmp, h->COP1, siglen, args->len);
|
||||
} else {
|
||||
tmp = strncmp(sig, args->data, std::min(siglen, args->len));
|
||||
cmpResult = compare(tmp, h->COP1, siglen, args->len);
|
||||
}
|
||||
}
|
||||
|
||||
if (!cmpResult && h->BOP == BOP_AND)
|
||||
goto no_store;
|
||||
if (cmpResult && h->BOP == BOP_OR)
|
||||
goto store;
|
||||
|
||||
argsOffset += sizeof(uint16_t) + args->len;
|
||||
argIndex++;
|
||||
args = (DataValue *) &niceInput[argsOffset];
|
||||
if (h->COP2 & COMPARE_LIKE) {
|
||||
p_DataValue dv;
|
||||
|
||||
dv.len = siglen;
|
||||
dv.data = (uint8_t *) sig;
|
||||
cmpResult = isLike(&dv, ®ex[argIndex]);
|
||||
if (h->COP2 & COMPARE_NOT)
|
||||
cmpResult = !cmpResult;
|
||||
}
|
||||
|
||||
else {
|
||||
if (utf8) {
|
||||
arg_utf8 = string(args->data, args->len);
|
||||
tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str());
|
||||
cmpResult = compare(tmp, h->COP2, siglen, args->len);
|
||||
} else {
|
||||
tmp = strncmp(sig, args->data, std::min(siglen, args->len));
|
||||
cmpResult = compare(tmp, h->COP2, siglen, args->len);
|
||||
}
|
||||
}
|
||||
|
||||
if (cmpResult)
|
||||
goto store;
|
||||
goto no_store;
|
||||
}
|
||||
default: {
|
||||
for (i = 0, cmpResult = true; i < h->NVALS; i++) {
|
||||
if (h->COP1 & COMPARE_LIKE) {
|
||||
p_DataValue dv;
|
||||
|
||||
dv.len = siglen;
|
||||
dv.data = (uint8_t *) sig;
|
||||
cmpResult = isLike(&dv, ®ex[argIndex]);
|
||||
if (h->COP1 & COMPARE_NOT)
|
||||
cmpResult = !cmpResult;
|
||||
}
|
||||
|
||||
else {
|
||||
if (utf8) {
|
||||
tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str());
|
||||
cmpResult = compare(tmp, h->COP2, siglen, args->len);
|
||||
} else {
|
||||
tmp = strncmp(sig, args->data, std::min(siglen, args->len));
|
||||
cmpResult = compare(tmp, h->COP1, siglen, args->len);
|
||||
}
|
||||
}
|
||||
|
||||
if (!cmpResult && h->BOP == BOP_AND)
|
||||
goto no_store;
|
||||
if (cmpResult && h->BOP == BOP_OR)
|
||||
goto store;
|
||||
|
||||
argsOffset += sizeof(uint16_t) + args->len;
|
||||
argIndex++;
|
||||
args = (DataValue *) &niceInput[argsOffset];
|
||||
if ( utf8) {
|
||||
arg_utf8 = string(args->data, args->len);
|
||||
}
|
||||
}
|
||||
if (i == h->NVALS && cmpResult)
|
||||
goto store;
|
||||
else
|
||||
goto no_store;
|
||||
}
|
||||
}
|
||||
|
||||
store:
|
||||
if (h->OutputType == OT_DATAVALUE) {
|
||||
if ((ret->NBYTES + sizeof(DataValue) + siglen) > outSize) {
|
||||
MessageLog logger(LoggingID(28));
|
||||
logging::Message::Args marker;
|
||||
Message msg(35);
|
||||
|
||||
marker.add(8);
|
||||
msg.format(marker);
|
||||
logger.logErrorMessage(msg);
|
||||
|
||||
throw logging::DictionaryBufferOverflow();
|
||||
}
|
||||
|
||||
retDataValues->len = siglen;
|
||||
memcpy(retDataValues->data, sig, siglen);
|
||||
rdvOffset += sizeof(DataValue) + siglen;
|
||||
retDataValues = (DataValue *) &niceRet[rdvOffset];
|
||||
ret->NVALS++;
|
||||
ret->NBYTES += sizeof(DataValue) + siglen;
|
||||
}
|
||||
else if (h->OutputType == OT_TOKEN) {
|
||||
if ((ret->NBYTES + sizeof(PrimToken)) > outSize) {
|
||||
MessageLog logger(LoggingID(28));
|
||||
logging::Message::Args marker;
|
||||
Message msg(35);
|
||||
|
||||
marker.add(9);
|
||||
msg.format(marker);
|
||||
logger.logErrorMessage(msg);
|
||||
|
||||
throw logging::DictionaryBufferOverflow();
|
||||
}
|
||||
|
||||
retTokens[ret->NVALS].LBID = h->LBID;
|
||||
retTokens[ret->NVALS].offset = offsetIndex; // need index rather than the block offset... rp 12/19/06
|
||||
retTokens[ret->NVALS].len = args->len;
|
||||
ret->NVALS++;
|
||||
ret->NBYTES += sizeof(PrimToken);
|
||||
}
|
||||
/*
|
||||
* XXXPAT: HACK! Ron requested a special case where the input string
|
||||
* that matched and the token of the matched string were returned.
|
||||
* It will not be used in cases where there are multiple input strings.
|
||||
* We need to rethink the requirements for this primitive after Dec 15.
|
||||
*/
|
||||
else if (h->OutputType == OT_BOTH) {
|
||||
if (ret->NBYTES + sizeof(PrimToken) + sizeof(DataValue) + args->len > outSize) {
|
||||
MessageLog logger(LoggingID(28));
|
||||
logging::Message::Args marker;
|
||||
Message msg(35);
|
||||
|
||||
marker.add(10);
|
||||
msg.format(marker);
|
||||
logger.logErrorMessage(msg);
|
||||
|
||||
throw logging::DictionaryBufferOverflow();
|
||||
}
|
||||
|
||||
retDataValues->len = args->len;
|
||||
memcpy(retDataValues->data, args->data, args->len);
|
||||
rdvOffset += sizeof(DataValue) + args->len;
|
||||
retTokens = reinterpret_cast<PrimToken *>(&niceRet[rdvOffset]);
|
||||
retTokens->LBID = h->LBID;
|
||||
retTokens->offset = offsetIndex; // need index rather than the block offset... rp 12/19/06
|
||||
retTokens->len = args->len;
|
||||
rdvOffset += sizeof(PrimToken);
|
||||
retDataValues = reinterpret_cast<DataValue *>(&niceRet[rdvOffset]);
|
||||
ret->NBYTES += sizeof(PrimToken) + sizeof(DataValue) + args->len;
|
||||
ret->NVALS++;
|
||||
}
|
||||
no_store:
|
||||
; //this is intentional
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void PrimitiveProcessor::nextSig(int NVALS,
|
||||
const PrimToken *tokens,
|
||||
p_DataValue *ret,
|
||||
uint8_t outputFlags,
|
||||
bool oldGetSigBehavior, bool skipNulls) throw()
|
||||
{
|
||||
const uint8_t* niceBlock = reinterpret_cast<const uint8_t *>(block);
|
||||
const uint16_t *offsets
|
||||
= reinterpret_cast<const uint16_t *>(&niceBlock[10]);
|
||||
|
||||
if (NVALS == 0) {
|
||||
if (offsets[dict_OffsetIndex + 1] == 0xffff) {
|
||||
ret->len = -1;
|
||||
return;
|
||||
}
|
||||
ret->len = offsets[dict_OffsetIndex] - offsets[dict_OffsetIndex + 1];
|
||||
ret->data = &niceBlock[offsets[dict_OffsetIndex + 1]];
|
||||
if (outputFlags & OT_TOKEN)
|
||||
currentOffsetIndex = dict_OffsetIndex + 1;
|
||||
} else {
|
||||
|
||||
again:
|
||||
if (dict_OffsetIndex >= NVALS) {
|
||||
ret->len = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (oldGetSigBehavior) {
|
||||
|
||||
const OldGetSigParams *oldParams=
|
||||
reinterpret_cast<const OldGetSigParams *>(tokens);
|
||||
if (oldParams[dict_OffsetIndex].rid & 0x8000000000000000LL) {
|
||||
if (skipNulls) {
|
||||
/* Bug 3321. For some cases the NULL token should be skipped. The
|
||||
* isnull filter is handled by token columncommand or by the F & E
|
||||
* framework. This primitive should only process nulls
|
||||
* when it's for projection.
|
||||
*/
|
||||
dict_OffsetIndex++;
|
||||
goto again;
|
||||
}
|
||||
ret->len = nullStringLen;
|
||||
ret->data = (const uint8_t *) nullString;
|
||||
}
|
||||
else {
|
||||
ret->len = offsets[oldParams[dict_OffsetIndex].offsetIndex - 1] -
|
||||
offsets[oldParams[dict_OffsetIndex].offsetIndex];
|
||||
//Whoa! apparently we have come across a missing signature! That is, the requested ordinal
|
||||
// is larger than the number of signatures in this block. Return a "special" string so that
|
||||
// the query keeps going, but that can be recognized as an internal error upon inspection.
|
||||
//@Bug 2534. Change the length check to 8000
|
||||
if (ret->len < 0 || ret->len > 8001)
|
||||
{
|
||||
ret->data = reinterpret_cast<const uint8_t*>(signatureNotFound);
|
||||
ret->len = strlen(reinterpret_cast<const char*>(ret->data));
|
||||
}
|
||||
else
|
||||
ret->data = &niceBlock[offsets[oldParams[dict_OffsetIndex].offsetIndex]];
|
||||
}
|
||||
// idbassert(ret->len >= 0);
|
||||
currentOffsetIndex = oldParams[dict_OffsetIndex].offsetIndex;
|
||||
dict_OffsetIndex++;
|
||||
return;
|
||||
}
|
||||
|
||||
/* XXXPAT: Need to check for the NULL token here */
|
||||
ret->len = tokens[dict_OffsetIndex].len;
|
||||
ret->data = &niceBlock[tokens[dict_OffsetIndex].offset];
|
||||
|
||||
if (outputFlags & OT_TOKEN) {
|
||||
//offsets = reinterpret_cast<const uint16_t *>(&niceBlock[10]);
|
||||
for (currentOffsetIndex = 1; offsets[currentOffsetIndex] != 0xffff; currentOffsetIndex++)
|
||||
if (tokens[dict_OffsetIndex].offset == offsets[currentOffsetIndex])
|
||||
break;
|
||||
|
||||
if (offsets[currentOffsetIndex] == 0xffff) {
|
||||
MessageLog logger(LoggingID(28));
|
||||
logging::Message::Args offset;
|
||||
Message msg(38);
|
||||
|
||||
offset.add(tokens[dict_OffsetIndex].offset);
|
||||
msg.format(offset);
|
||||
logger.logErrorMessage(msg);
|
||||
|
||||
currentOffsetIndex = -1;
|
||||
dict_OffsetIndex++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
dict_OffsetIndex++;
|
||||
}
|
||||
|
||||
void PrimitiveProcessor::p_AggregateSignature(const AggregateSignatureRequestHeader *in,
|
||||
AggregateSignatureResultHeader *out, unsigned outSize, unsigned *written, bool utf8)
|
||||
{
|
||||
|
||||
uint8_t *niceOutput; // h cast to a byte-indexed type
|
||||
int cmp;
|
||||
char cMin[BLOCK_SIZE], cMax[BLOCK_SIZE];
|
||||
int cMinLen, cMaxLen;
|
||||
p_DataValue sigptr;
|
||||
|
||||
DataValue *min;
|
||||
DataValue *max;
|
||||
|
||||
memcpy(out, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader));
|
||||
out->ism.Command = DICT_AGGREGATE_RESULTS;
|
||||
niceOutput = reinterpret_cast<uint8_t *>(out);
|
||||
|
||||
// The first sig is the min and the max.
|
||||
out->Count = 0;
|
||||
dict_OffsetIndex = 0;
|
||||
nextSig(in->NVALS, in->tokens, &sigptr);
|
||||
if (sigptr.len == -1)
|
||||
return;
|
||||
out->Count++;
|
||||
memcpy(cMin, sigptr.data, sigptr.len);
|
||||
memcpy(cMax, sigptr.data, sigptr.len);
|
||||
cMinLen = cMaxLen = sigptr.len;
|
||||
|
||||
for (nextSig(in->NVALS, in->tokens, &sigptr); sigptr.len != -1;
|
||||
nextSig(in->NVALS, in->tokens, &sigptr), out->Count++) {
|
||||
string sig_utf8;
|
||||
if (utf8) {
|
||||
string cMin_utf8(cMin, cMinLen);
|
||||
string tmpString((char*)sigptr.data, sigptr.len);
|
||||
sig_utf8 = tmpString;
|
||||
cmp = utf8::idb_strcoll(cMin_utf8.c_str(), sig_utf8.c_str());
|
||||
} else {
|
||||
cmp = strncmp(cMin, (char*)sigptr.data, std::min(cMinLen, sigptr.len));
|
||||
}
|
||||
if (cmp > 0) {
|
||||
memcpy(cMin, sigptr.data, sigptr.len);
|
||||
cMinLen = sigptr.len;
|
||||
}
|
||||
if (utf8) {
|
||||
string cMax_utf8(cMax, cMaxLen);
|
||||
cmp = utf8::idb_strcoll(cMax_utf8.c_str(), sig_utf8.c_str());
|
||||
} else {
|
||||
cmp = strncmp(cMax, (char*)sigptr.data, std::min(cMaxLen, sigptr.len));
|
||||
}
|
||||
if (cmp < 0) {
|
||||
memcpy(cMax, sigptr.data, sigptr.len);
|
||||
cMaxLen = sigptr.len;
|
||||
}
|
||||
}
|
||||
|
||||
//we now have the results, stuff them into the output buffer
|
||||
#ifdef PRIM_DEBUG
|
||||
unsigned size = sizeof(AggregateSignatureResultHeader) + cMaxLen + cMinLen
|
||||
+ sizeof(uint16_t) * 2;
|
||||
if (outSize < size) {
|
||||
MessageLog logger(LoggingID(28));
|
||||
logging::Message::Args marker;
|
||||
Message msg(35);
|
||||
|
||||
marker.add(11);
|
||||
msg.format(marker);
|
||||
logger.logErrorMessage(msg);
|
||||
|
||||
throw length_error("PrimitiveProcessor::p_AggregateSignature(): output buffer is too small");
|
||||
}
|
||||
#endif
|
||||
|
||||
min = reinterpret_cast<DataValue *>
|
||||
(&niceOutput[sizeof(AggregateSignatureResultHeader)]);
|
||||
max = reinterpret_cast<DataValue *>
|
||||
(&niceOutput[sizeof(AggregateSignatureResultHeader) + cMinLen + sizeof(uint16_t)]);
|
||||
min->len = cMinLen;
|
||||
max->len = cMaxLen;
|
||||
memcpy(min->data, cMin, cMinLen);
|
||||
memcpy(max->data, cMax, cMaxLen);
|
||||
*written = sizeof(AggregateSignatureResultHeader) + cMaxLen + cMinLen
|
||||
+ sizeof(uint16_t) * 2;
|
||||
}
|
||||
|
||||
const char backslash = '\\';
|
||||
|
||||
inline bool PrimitiveProcessor::isEscapedChar(char c)
|
||||
{
|
||||
return ('%' == c || '_' == c);
|
||||
}
|
||||
|
||||
//FIXME: copy/pasted to dataconvert.h: refactor
|
||||
int PrimitiveProcessor::convertToRegexp(idb_regex_t *regex, const p_DataValue *str)
|
||||
{
|
||||
//In the worst case, every char is quadrupled, plus some leading/trailing cruft...
|
||||
char* cBuf = (char*)alloca(((4 * str->len) + 3) * sizeof(char));
|
||||
char c;
|
||||
int i, cBufIdx = 0;
|
||||
// translate to regexp symbols
|
||||
cBuf[cBufIdx++] = '^'; // implicit leading anchor
|
||||
for (i = 0; i < str->len; i++) {
|
||||
c = (char) str->data[i];
|
||||
switch (c) {
|
||||
|
||||
// chars to substitute
|
||||
case '%':
|
||||
cBuf[cBufIdx++] = '.';
|
||||
cBuf[cBufIdx++] = '*';
|
||||
break;
|
||||
case '_':
|
||||
cBuf[cBufIdx++] = '.';
|
||||
break;
|
||||
|
||||
// escape the chars that are special in regexp's but not in SQL
|
||||
// default special characters in perl: .[{}()\*+?|^$
|
||||
case '.':
|
||||
case '*':
|
||||
case '^':
|
||||
case '$':
|
||||
case '?':
|
||||
case '+':
|
||||
case '|':
|
||||
case '[':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '(':
|
||||
case ')':
|
||||
cBuf[cBufIdx++] = backslash;
|
||||
cBuf[cBufIdx++] = c;
|
||||
break;
|
||||
case backslash: //this is the sql escape char
|
||||
if ( i + 1 < str->len)
|
||||
{
|
||||
if (isEscapedChar(str->data[i+1]))
|
||||
{
|
||||
cBuf[cBufIdx++] = str->data[++i];
|
||||
break;
|
||||
}
|
||||
else if (backslash == str->data[i+1])
|
||||
{
|
||||
cBuf[cBufIdx++] = c;
|
||||
cBuf[cBufIdx++] = str->data[++i];
|
||||
break;
|
||||
}
|
||||
|
||||
} //single slash
|
||||
cBuf[cBufIdx++] = backslash;
|
||||
cBuf[cBufIdx++] = c;
|
||||
break;
|
||||
default:
|
||||
cBuf[cBufIdx++] = c;
|
||||
}
|
||||
}
|
||||
cBuf[cBufIdx++] = '$'; // implicit trailing anchor
|
||||
cBuf[cBufIdx++] = '\0';
|
||||
|
||||
#ifdef VERBOSE
|
||||
cerr << "regexified string is " << cBuf << endl;
|
||||
#endif
|
||||
|
||||
#ifdef POSIX_REGEX
|
||||
regcomp(®ex->regex, cBuf, REG_NOSUB | REG_EXTENDED);
|
||||
#else
|
||||
regex->regex = cBuf;
|
||||
#endif
|
||||
regex->used = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool PrimitiveProcessor::isLike(const p_DataValue *dict, const idb_regex_t *regex) throw()
|
||||
{
|
||||
#ifdef POSIX_REGEX
|
||||
char cBuf[dict->len + 1];
|
||||
memcpy(cBuf, dict->data, dict->len);
|
||||
cBuf[dict->len] = '\0';
|
||||
|
||||
return (regexec(®ex->regex, cBuf, 0, NULL, 0) == 0);
|
||||
#else
|
||||
/* Note, the passed-in pointers are effectively begin() and end() iterators */
|
||||
return regex_match(dict->data, dict->data + dict->len, regex->regex);
|
||||
#endif
|
||||
}
|
||||
|
||||
boost::shared_array<idb_regex_t>
|
||||
PrimitiveProcessor::makeLikeFilter (const DictFilterElement *filterString, uint32_t count)
|
||||
{
|
||||
boost::shared_array<idb_regex_t> ret;
|
||||
uint32_t filterIndex, filterOffset;
|
||||
uint8_t *in8 = (uint8_t *) filterString;
|
||||
const DictFilterElement *filter;
|
||||
p_DataValue filterptr = {0, NULL};
|
||||
|
||||
for (filterIndex = 0, filterOffset = 0; filterIndex < count; filterIndex++) {
|
||||
filter = reinterpret_cast<const DictFilterElement *>(&in8[filterOffset]);
|
||||
|
||||
if (filter->COP & COMPARE_LIKE) {
|
||||
if (!ret)
|
||||
ret.reset(new idb_regex_t[count]);
|
||||
|
||||
filterptr.len = filter->len;
|
||||
filterptr.data = filter->data;
|
||||
convertToRegexp(&ret[filterIndex], &filterptr);
|
||||
}
|
||||
filterOffset += sizeof(DictFilterElement) + filter->len;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void PrimitiveProcessor::p_Dictionary(const DictInput *in, vector<uint8_t> *out, bool utf8,
|
||||
bool skipNulls, boost::shared_ptr<DictEqualityFilter> eqFilter, uint8_t eqOp)
|
||||
{
|
||||
PrimToken *outToken;
|
||||
const DictFilterElement *filter=0;
|
||||
const uint8_t* in8;
|
||||
DataValue *outValue;
|
||||
p_DataValue min={0, NULL}, max={0, NULL}, sigptr={0, NULL};
|
||||
int tmp, filterIndex, filterOffset;
|
||||
uint16_t aggCount;
|
||||
bool cmpResult;
|
||||
DictOutput header;
|
||||
|
||||
// default size of the ouput to something sufficiently large to prevent
|
||||
// excessive reallocation and copy when resizing
|
||||
const unsigned DEF_OUTSIZE = 16*1024;
|
||||
// use this factor to scale out size of future resize calls
|
||||
const int SCALE_FACTOR = 2;
|
||||
out->resize(DEF_OUTSIZE);
|
||||
|
||||
in8 = reinterpret_cast<const uint8_t *>(in);
|
||||
|
||||
memcpy(&header, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader));
|
||||
header.ism.Command = DICT_RESULTS;
|
||||
header.NVALS = 0;
|
||||
header.LBID = in->LBID;
|
||||
dict_OffsetIndex = 0;
|
||||
filterIndex = 0;
|
||||
aggCount = 0;
|
||||
min.len = 0;
|
||||
max.len = 0;
|
||||
|
||||
//...Initialize I/O counts
|
||||
header.CacheIO = 0;
|
||||
header.PhysicalIO = 0;
|
||||
|
||||
header.NBYTES = sizeof(DictOutput);
|
||||
|
||||
for (nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType,
|
||||
(in->InputFlags ? true : false), skipNulls);
|
||||
sigptr.len != -1;
|
||||
nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType,
|
||||
(in->InputFlags ? true : false), skipNulls)) {
|
||||
|
||||
string sig_utf8;
|
||||
if (utf8) {
|
||||
string tmpString((char*)sigptr.data, sigptr.len);
|
||||
sig_utf8 = tmpString;
|
||||
}
|
||||
|
||||
// do aggregate processing
|
||||
if (in->OutputType & OT_AGGREGATE) {
|
||||
// len == 0 indicates this is the first pass
|
||||
if (max.len != 0) {
|
||||
if (utf8 ) {
|
||||
string max_utf8((char*)max.data, max.len);
|
||||
tmp = utf8::idb_strcoll(sig_utf8.c_str(), max_utf8.c_str());
|
||||
} else {
|
||||
tmp = strncmp((char *)sigptr.data, (char *)max.data, std::min(sigptr.len, max.len));
|
||||
}
|
||||
if (tmp > 0)
|
||||
max = sigptr;
|
||||
}
|
||||
else
|
||||
max = sigptr;
|
||||
|
||||
if (min.len != 0) {
|
||||
if (utf8) {
|
||||
string min_utf8((char*)min.data, min.len);
|
||||
tmp = utf8::idb_strcoll(sig_utf8.c_str(), min_utf8.c_str());
|
||||
} else {
|
||||
tmp = strncmp((char *)sigptr.data, (char *)min.data, std::min(sigptr.len, min.len));
|
||||
}
|
||||
if (tmp < 0)
|
||||
min = sigptr;
|
||||
}
|
||||
else
|
||||
min = sigptr;
|
||||
aggCount++;
|
||||
}
|
||||
|
||||
// filter processing
|
||||
if (in->InputFlags == 1)
|
||||
filterOffset = sizeof(DictInput) + (in->NVALS * sizeof(OldGetSigParams));
|
||||
else
|
||||
filterOffset = sizeof(DictInput) + (in->NVALS * sizeof(PrimToken));
|
||||
|
||||
if (eqFilter) {
|
||||
bool gotIt = (eqFilter->find(string((char *) sigptr.data, sigptr.len))
|
||||
!= eqFilter->end());
|
||||
if ((gotIt && eqOp == COMPARE_EQ) || (!gotIt && eqOp == COMPARE_NE))
|
||||
goto store;
|
||||
goto no_store;
|
||||
}
|
||||
|
||||
for (filterIndex = 0; filterIndex < in->NOPS; filterIndex++) {
|
||||
filter = reinterpret_cast<const DictFilterElement *>(&in8[filterOffset]);
|
||||
string filt_utf8;
|
||||
size_t filt_utf8_len=0;
|
||||
if (utf8) {
|
||||
string tmpString((const char *)filter->data, filter->len);
|
||||
filt_utf8 = tmpString;
|
||||
filt_utf8_len = filt_utf8.length();
|
||||
}
|
||||
|
||||
if (filter->COP & COMPARE_LIKE) {
|
||||
cmpResult = isLike(&sigptr, &parsedLikeFilter[filterIndex]);
|
||||
if (filter->COP & COMPARE_NOT)
|
||||
cmpResult = !cmpResult;
|
||||
}
|
||||
else {
|
||||
if (utf8) {
|
||||
size_t sig_utf8_len = sig_utf8.length();
|
||||
tmp = utf8::idb_strcoll(sig_utf8.c_str(), filt_utf8.c_str());
|
||||
cmpResult = compare(tmp, filter->COP, sig_utf8_len, filt_utf8_len);
|
||||
} else {
|
||||
tmp = strncmp((const char *) sigptr.data, (const char *)filter->data,
|
||||
std::min(sigptr.len, static_cast<int>(filter->len)));
|
||||
}
|
||||
cmpResult = compare(tmp, filter->COP, sigptr.len, filter->len);
|
||||
}
|
||||
|
||||
if (!cmpResult && in->BOP != BOP_OR)
|
||||
goto no_store;
|
||||
if (cmpResult && in->BOP != BOP_AND)
|
||||
goto store;
|
||||
filterOffset += sizeof(DictFilterElement) + filter->len;
|
||||
}
|
||||
if (filterIndex == in->NOPS && in->BOP != BOP_OR) {
|
||||
store:
|
||||
//cout << "storing it, str = " << string((char *)sigptr.data, sigptr.len) << endl;
|
||||
header.NVALS++;
|
||||
if (in->OutputType & OT_RID && in->InputFlags == 1) { // hack that indicates old GetSignature behavior
|
||||
const OldGetSigParams *oldParams;
|
||||
uint64_t *outRid;
|
||||
oldParams = reinterpret_cast<const OldGetSigParams *>(in->tokens);
|
||||
uint32_t newlen = header.NBYTES + 8;
|
||||
if( newlen > out->size() )
|
||||
{
|
||||
out->resize( out->size() * SCALE_FACTOR );
|
||||
}
|
||||
outRid = (uint64_t *) &(*out)[header.NBYTES];
|
||||
// mask off the upper bit of the rid; signifies the NULL token was passed in
|
||||
*outRid = (oldParams[dict_OffsetIndex - 1].rid & 0x7fffffffffffffffLL);
|
||||
header.NBYTES += 8;
|
||||
}
|
||||
|
||||
if (in->OutputType & OT_INPUTARG && in->InputFlags == 0) {
|
||||
uint32_t newlen = header.NBYTES + sizeof(DataValue) + filter->len;
|
||||
if( newlen > out->size() )
|
||||
{
|
||||
out->resize( out->size() * SCALE_FACTOR );
|
||||
}
|
||||
outValue = reinterpret_cast<DataValue *>(&(*out)[header.NBYTES]);
|
||||
outValue->len = filter->len;
|
||||
memcpy(outValue->data, filter->data, filter->len);
|
||||
header.NBYTES += sizeof(DataValue) + filter->len;
|
||||
}
|
||||
if (in->OutputType & OT_TOKEN) {
|
||||
uint32_t newlen = header.NBYTES + sizeof(PrimToken);
|
||||
if( newlen > out->size() )
|
||||
{
|
||||
out->resize( out->size() * SCALE_FACTOR );
|
||||
}
|
||||
outToken = reinterpret_cast<PrimToken *>(&(*out)[header.NBYTES]);
|
||||
outToken->LBID = in->LBID;
|
||||
outToken->offset = currentOffsetIndex;
|
||||
outToken->len = filter->len;
|
||||
header.NBYTES += sizeof(PrimToken);
|
||||
}
|
||||
if (in->OutputType & OT_DATAVALUE) {
|
||||
uint32_t newlen = header.NBYTES + sizeof(DataValue) + sigptr.len;
|
||||
if( newlen > out->size() )
|
||||
{
|
||||
out->resize( out->size() * SCALE_FACTOR );
|
||||
}
|
||||
outValue = reinterpret_cast<DataValue *>(&(*out)[header.NBYTES]);
|
||||
outValue->len = sigptr.len;
|
||||
memcpy(outValue->data, sigptr.data, sigptr.len);
|
||||
header.NBYTES += sizeof(DataValue) + sigptr.len;
|
||||
}
|
||||
}
|
||||
no_store: ; // intentional
|
||||
}
|
||||
|
||||
if (in->OutputType & OT_AGGREGATE) {
|
||||
uint32_t newlen = header.NBYTES + 3*sizeof(uint16_t) + min.len + max.len;
|
||||
if( newlen > out->size() )
|
||||
{
|
||||
out->resize( out->size() * SCALE_FACTOR );
|
||||
}
|
||||
uint16_t *tmp16 = reinterpret_cast<uint16_t *>(&(*out)[header.NBYTES]);
|
||||
DataValue *tmpDV = reinterpret_cast<DataValue *>(&(*out)[header.NBYTES + sizeof(uint16_t)]);
|
||||
|
||||
*tmp16 = aggCount;
|
||||
tmpDV->len = min.len;
|
||||
memcpy(tmpDV->data, min.data, min.len);
|
||||
header.NBYTES += 2*sizeof(uint16_t) + min.len;
|
||||
|
||||
tmpDV = reinterpret_cast<DataValue *>(&(*out)[header.NBYTES]);
|
||||
tmpDV->len = max.len;
|
||||
memcpy(tmpDV->data, max.data, max.len);
|
||||
header.NBYTES += sizeof(uint16_t) + max.len;
|
||||
}
|
||||
|
||||
out->resize( header.NBYTES );
|
||||
|
||||
memcpy(&(*out)[0], &header, sizeof(DictOutput));
|
||||
}
|
||||
|
||||
}
|
||||
// vim:ts=4 sw=4:
|
||||
|
Reference in New Issue
Block a user