/* Copyright (C) 2014 InfiniDB, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /***************************************************************************** * $Id: column.cpp 2103 2013-06-04 17:53:38Z dcathey $ * ****************************************************************************/ #include #include //#define NDEBUG #include #include #ifndef _MSC_VER #include #else #endif using namespace std; #include using namespace boost; #include "primitiveprocessor.h" #include "messagelog.h" #include "messageobj.h" #include "we_type.h" #include "stats.h" #include "primproc.h" using namespace logging; using namespace dbbc; using namespace primitives; using namespace primitiveprocessor; using namespace execplan; namespace { inline uint64_t order_swap(uint64_t x) { uint64_t ret = (x>>56) | ((x<<40) & 0x00FF000000000000ULL) | ((x<<24) & 0x0000FF0000000000ULL) | ((x<<8) & 0x000000FF00000000ULL) | ((x>>8) & 0x00000000FF000000ULL) | ((x>>24) & 0x0000000000FF0000ULL) | ((x>>40) & 0x000000000000FF00ULL) | (x<<56); return ret; } template inline string fixChar(int64_t intval); idb_regex_t placeholderRegex; template inline int compareBlock( const void * a, const void * b ) { return ( (*(T*)a) - (*(T*)b) ); } //this function is out-of-band, we don't need to inline it void logIt(int mid, int arg1, const string& arg2=string()) { MessageLog logger(LoggingID(28)); logging::Message::Args args; Message msg(mid); args.add(arg1); if (arg2.length() > 0) args.add(arg2); msg.format(args); logger.logErrorMessage(msg); } //FIXME: what are we trying to accomplish here? It looks like we just want to count // the chars in a string arg? p_DataValue convertToPDataValue(const void* val, int W) { p_DataValue dv; string str; if (8 == W) str = fixChar<8>(*reinterpret_cast(val)); else str = reinterpret_cast(val); dv.len = static_cast(str.length()); dv.data = reinterpret_cast(val); return dv; } template inline bool colCompare_(const T& val1, const T& val2, uint8_t COP) { switch(COP) { case COMPARE_NIL: return false; case COMPARE_LT: return val1 < val2; case COMPARE_EQ: return val1 == val2; case COMPARE_LE: return val1 <= val2; case COMPARE_GT: return val1 > val2; case COMPARE_NE: return val1 != val2; case COMPARE_GE: return val1 >= val2; default: logIt(34, COP, "colCompare"); return false; // throw an exception here? } } template inline bool colCompare_(const T& val1, const T& val2, uint8_t COP, uint8_t rf) { switch(COP) { case COMPARE_NIL: return false; case COMPARE_LT: return val1 < val2 || (val1 == val2 && (rf & 0x01)); case COMPARE_LE: return val1 < val2 || (val1 == val2 && rf ^ 0x80); case COMPARE_EQ: return val1 == val2 && rf == 0; case COMPARE_NE: return val1 != val2 || rf != 0; case COMPARE_GE: return val1 > val2 || (val1 == val2 && rf ^ 0x01); case COMPARE_GT: return val1 > val2 || (val1 == val2 && (rf & 0x80)); default: logIt(34, COP, "colCompare_l"); return false; // throw an exception here? } } bool isLike(const char *val, const idb_regex_t *regex) { if (!regex) throw runtime_error("PrimitiveProcessor::isLike: Missing regular expression for LIKE operator"); #ifdef POSIX_REGEX return (regexec(®ex->regex, val, 0, NULL, 0) == 0); #else return regex_match(val, regex->regex); #endif } //@bug 1828 Like must be a string compare. inline bool colStrCompare_(uint64_t val1, uint64_t val2, uint8_t COP, uint8_t rf, const idb_regex_t* regex) { switch(COP) { case COMPARE_NIL: return false; case COMPARE_LT: return val1 < val2 || (val1 == val2 && rf != 0); case COMPARE_LE: return val1 <= val2; case COMPARE_EQ: return val1 == val2 && rf == 0; case COMPARE_NE: return val1 != val2 || rf != 0; case COMPARE_GE: return val1 > val2 || (val1 == val2 && rf == 0); case COMPARE_GT: return val1 > val2; case COMPARE_LIKE: case COMPARE_NLIKE: { /* LIKE comparisons are string comparisons so we reverse the order again. Switching the order twice is probably as efficient as evaluating a guard. */ char tmp[9]; val1 = order_swap(val1); memcpy(tmp, &val1, 8); tmp[8] = '\0'; return (COP & COMPARE_NOT ? !isLike(tmp, regex) : isLike(tmp, regex)); } default: logIt(34, COP, "colCompare_l"); return false; // throw an exception here? } } #if 0 inline bool colStrCompare_(uint64_t val1, uint64_t val2, uint8_t COP, const idb_regex_t* regex) { switch(COP) { case COMPARE_NIL: return false; case COMPARE_LT: return val1 < val2; case COMPARE_LE: return val1 <= val2; case COMPARE_EQ: return val1 == val2; case COMPARE_NE: return val1 != val2; case COMPARE_GE: return val1 >= val2; case COMPARE_GT: return val1 > val2; case COMPARE_LIKE: case COMPARE_NOT | COMPARE_LIKE: { /* LIKE comparisons are string comparisons so we reverse the order again. Switching the order twice is probably as efficient as evaluating a guard. */ char tmp[9]; val1 = order_swap(val1); memcpy(tmp, &val1, 8); tmp[8] = '\0'; return (COP & COMPARE_NOT ? !isLike(tmp, regex) : isLike(tmp, regex)); } default: logIt(34, COP, "colCompare"); return false; // throw an exception here? } } #endif template inline bool isEmptyVal(uint8_t type, const uint8_t* val8); template<> inline bool isEmptyVal<8>(uint8_t type, const uint8_t* ival) { const uint64_t* val = reinterpret_cast(ival); switch (type) { case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: return (joblist::DOUBLEEMPTYROW == *val); case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::VARBINARY: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: return (*val == joblist::CHAR8EMPTYROW); case CalpontSystemCatalog::UBIGINT: return (joblist::UBIGINTEMPTYROW == *val); default: break; } return (joblist::BIGINTEMPTYROW == *val); } template<> inline bool isEmptyVal<4>(uint8_t type, const uint8_t* ival) { const uint32_t* val = reinterpret_cast(ival); switch (type) { case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: return (joblist::FLOATEMPTYROW == *val); case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: return (joblist::CHAR4EMPTYROW == *val); case CalpontSystemCatalog::UINT: return (joblist::UINTEMPTYROW == *val); default: break; } return (joblist::INTEMPTYROW == *val); } template<> inline bool isEmptyVal<2>(uint8_t type, const uint8_t* ival) { const uint16_t* val = reinterpret_cast(ival); switch (type) { case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: return (joblist::CHAR2EMPTYROW == *val); case CalpontSystemCatalog::USMALLINT: return (joblist::USMALLINTEMPTYROW == *val); default: break; } return (joblist::SMALLINTEMPTYROW == *val); } template<> inline bool isEmptyVal<1>(uint8_t type, const uint8_t* ival) { const uint8_t* val = reinterpret_cast(ival); switch (type) { case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: return (*val == joblist::CHAR1EMPTYROW); case CalpontSystemCatalog::UTINYINT: return (*val == joblist::UTINYINTEMPTYROW); default: break; } return (*val == joblist::TINYINTEMPTYROW); } template inline bool isNullVal(uint8_t type, const uint8_t* val8); template<> inline bool isNullVal<8>(uint8_t type, const uint8_t* ival) { const uint64_t* val = reinterpret_cast(ival); switch (type) { case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: return (joblist::DOUBLENULL == *val); case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::VARBINARY: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: //@bug 339 might be a token here //TODO: what's up with the second const here? return (*val == joblist::CHAR8NULL || 0xFFFFFFFFFFFFFFFELL == *val); case CalpontSystemCatalog::UBIGINT: return (joblist::UBIGINTNULL == *val); default: break; } return (joblist::BIGINTNULL == *val); } template<> inline bool isNullVal<4>(uint8_t type, const uint8_t* ival) { const uint32_t* val = reinterpret_cast(ival); switch (type) { case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: return (joblist::FLOATNULL == *val); case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: return (joblist::CHAR4NULL == *val); case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: return (joblist::DATENULL == *val); case CalpontSystemCatalog::UINT: return (joblist::UINTNULL == *val); default: break; } return (joblist::INTNULL == *val); } template<> inline bool isNullVal<2>(uint8_t type, const uint8_t* ival) { const uint16_t* val = reinterpret_cast(ival); switch (type) { case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: return (joblist::CHAR2NULL == *val); case CalpontSystemCatalog::USMALLINT: return (joblist::USMALLINTNULL == *val); default: break; } return (joblist::SMALLINTNULL == *val); } template<> inline bool isNullVal<1>(uint8_t type, const uint8_t* ival) { const uint8_t* val = reinterpret_cast(ival); switch (type) { case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: return (*val == joblist::CHAR1NULL); case CalpontSystemCatalog::UTINYINT: return (joblist::UTINYINTNULL == *val); default: break; } return (*val == joblist::TINYINTNULL); } /* A generic isNullVal */ inline bool isNullVal(uint32_t length, uint8_t type, const uint8_t *val8) { switch (length) { case 8: return isNullVal<8>(type, val8); case 4: return isNullVal<4>(type, val8); case 2: return isNullVal<2>(type, val8); case 1: return isNullVal<1>(type, val8); }; return false; } // Set the minimum and maximum in the return header if we will be doing a block scan and // we are dealing with a type that is comparable as a 64 bit integer. Subsequent calls can then // skip this block if the value being searched is outside of the Min/Max range. inline bool isMinMaxValid(const NewColRequestHeader *in) { if (in->NVALS != 0) { return false; } else { switch (in->DataType) { case CalpontSystemCatalog::CHAR: return (in->DataSize<9); case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: return (in->DataSize<8); case CalpontSystemCatalog::TINYINT: case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::UTINYINT: case CalpontSystemCatalog::USMALLINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: return true; case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::UDECIMAL: return (in->DataSize <= 8); default: return false; } } } //char(8) values lose their null terminator template inline string fixChar(int64_t intval) { char chval[W + 1]; memcpy(chval, &intval, W); chval[W] = '\0'; return string(chval); } inline bool colCompare(int64_t val1, int64_t val2, uint8_t COP, uint8_t rf, int type, uint8_t width, const idb_regex_t& regex, bool isNull=false) { // cout << "comparing " << hex << val1 << " to " << val2 << endl; if (COMPARE_NIL == COP) return false; //@bug 425 added isNull condition else if ( !isNull && (type == CalpontSystemCatalog::FLOAT || type == CalpontSystemCatalog::DOUBLE)) { double dVal1, dVal2; if (type == CalpontSystemCatalog::FLOAT) { dVal1 = *((float *) &val1); dVal2 = *((float *) &val2); } else { dVal1 = *((double *) &val1); dVal2 = *((double *) &val2); } return colCompare_(dVal1, dVal2, COP); } else if ( (type == CalpontSystemCatalog::CHAR || type == CalpontSystemCatalog::VARCHAR || type == CalpontSystemCatalog::TEXT) && !isNull ) { if (!regex.used && !rf) return colCompare_(order_swap(val1), order_swap(val2), COP); else return colStrCompare_(order_swap(val1), order_swap(val2), COP, rf, ®ex); } /* isNullVal should work on the normalized value on little endian machines */ else { bool val2Null = isNullVal(width, type, (uint8_t *) &val2); if (isNull == val2Null || (val2Null && COP == COMPARE_NE)) return colCompare_(val1, val2, COP, rf); else return false; } } inline bool colCompareUnsigned(uint64_t val1, uint64_t val2, uint8_t COP, uint8_t rf, int type, uint8_t width, const idb_regex_t& regex, bool isNull=false) { // cout << "comparing unsigned" << hex << val1 << " to " << val2 << endl; if (COMPARE_NIL == COP) return false; /* isNullVal should work on the normalized value on little endian machines */ bool val2Null = isNullVal(width, type, (uint8_t *) &val2); if (isNull == val2Null || (val2Null && COP == COMPARE_NE)) return colCompare_(val1, val2, COP, rf); else return false; } inline void store(const NewColRequestHeader *in, NewColResultHeader *out, unsigned outSize, unsigned *written, uint16_t rid, const uint8_t *block8) { uint8_t* out8 = reinterpret_cast(out); if (in->OutputType & OT_RID) { #ifdef PRIM_DEBUG if (*written + 2 > outSize) { logIt(35, 1); throw logic_error("PrimitiveProcessor::store(): output buffer is too small"); } #endif out->RidFlags |= (1 << (rid >> 10)); // set the (row/1024)'th bit memcpy(&out8[*written], &rid, 2); *written += 2; } if (in->OutputType & OT_TOKEN || in->OutputType & OT_DATAVALUE) { #ifdef PRIM_DEBUG if (*written + in->DataSize > outSize) { logIt(35, 2); throw logic_error("PrimitiveProcessor::store(): output buffer is too small"); } #endif void* ptr1 = &out8[*written]; const uint8_t* ptr2 = &block8[0]; switch (in->DataSize) { default: case 8: ptr2 += (rid << 3); memcpy(ptr1, ptr2, 8); break; case 4: ptr2 += (rid << 2); memcpy(ptr1, ptr2, 4); break; case 2: ptr2 += (rid << 1); memcpy(ptr1, ptr2, 2); break; case 1: ptr2 += (rid << 0); memcpy(ptr1, ptr2, 1); break; } *written += in->DataSize; } out->NVALS++; } template inline uint64_t nextUnsignedColValue(int type, const uint16_t *ridArray, int NVALS, int *index, bool *done, bool *isNull, bool *isEmpty, uint16_t *rid, uint8_t OutputType, uint8_t *val8, unsigned itemsPerBlk) { const uint8_t* vp = 0; if (ridArray == NULL) { while (static_cast(*index) < itemsPerBlk && isEmptyVal(type, &val8[*index*W]) && (OutputType & OT_RID)) { (*index)++; } if (static_cast(*index) >= itemsPerBlk) { *done = true; return 0; } vp = &val8[*index*W]; *isNull = isNullVal(type, vp); *isEmpty = isEmptyVal(type, vp); *rid = (*index)++; } else { while (*index < NVALS && isEmptyVal(type, &val8[ridArray[*index] * W])) { (*index)++; } if (*index >= NVALS) { *done = true; return 0; } vp = &val8[ridArray[*index] * W]; *isNull = isNullVal(type, vp); *isEmpty = isEmptyVal(type, vp); *rid = ridArray[(*index)++]; } // at this point, nextRid is the index to return, and index is... // if RIDs are not specified, nextRid + 1, // if RIDs are specified, it's the next index in the rid array. //Bug 838, tinyint null problem switch (W) { case 1: return reinterpret_cast (val8)[*rid]; case 2: return reinterpret_cast(val8)[*rid]; case 4: return reinterpret_cast(val8)[*rid]; case 8: return reinterpret_cast(val8)[*rid]; default: logIt(33, W); #ifdef PRIM_DEBUG throw logic_error("PrimitiveProcessor::nextColValue() bad width"); #endif return -1; } } template inline int64_t nextColValue(int type, const uint16_t *ridArray, int NVALS, int *index, bool *done, bool *isNull, bool *isEmpty, uint16_t *rid, uint8_t OutputType, uint8_t *val8, unsigned itemsPerBlk) { const uint8_t* vp = 0; if (ridArray == NULL) { while (static_cast(*index) < itemsPerBlk && isEmptyVal(type, &val8[*index*W]) && (OutputType & OT_RID)) { (*index)++; } if (static_cast(*index) >= itemsPerBlk) { *done = true; return 0; } vp = &val8[*index*W]; *isNull = isNullVal(type, vp); *isEmpty = isEmptyVal(type, vp); *rid = (*index)++; } else { while (*index < NVALS && isEmptyVal(type, &val8[ridArray[*index] * W])) { (*index)++; } if (*index >= NVALS) { *done = true; return 0; } vp = &val8[ridArray[*index] * W]; *isNull = isNullVal(type, vp); *isEmpty = isEmptyVal(type, vp); *rid = ridArray[(*index)++]; } // at this point, nextRid is the index to return, and index is... // if RIDs are not specified, nextRid + 1, // if RIDs are specified, it's the next index in the rid array. //Bug 838, tinyint null problem switch (W) { case 1: return reinterpret_cast (val8)[*rid]; case 2: return reinterpret_cast(val8)[*rid]; case 4: #if 0 if (type == CalpontSystemCatalog::FLOAT) { // convert the float to a 64-bit type, return that w/o conversion int32_t* val32 = reinterpret_cast(val8); double dTmp; dTmp = (double) *((float *) &val32[*rid]); return *((int64_t *) &dTmp); } else { return reinterpret_cast(val8)[*rid]; } #else return reinterpret_cast(val8)[*rid]; #endif case 8: return reinterpret_cast(val8)[*rid]; default: logIt(33, W); #ifdef PRIM_DEBUG throw logic_error("PrimitiveProcessor::nextColValue() bad width"); #endif return -1; } } // done should be init'd to false and // index should be init'd to 0 on the first call // done == true when there are no more elements to return. inline uint64_t nextUnsignedColValueHelper(int type, int width, const uint16_t *ridArray, int NVALS, int *index, bool *done, bool *isNull, bool *isEmpty, uint16_t *rid, uint8_t OutputType, uint8_t *val8, unsigned itemsPerBlk) { switch (width) { case 8: return nextUnsignedColValue<8>(type, ridArray, NVALS, index, done, isNull, isEmpty, rid, OutputType, val8, itemsPerBlk); case 4: return nextUnsignedColValue<4>(type, ridArray, NVALS, index, done, isNull, isEmpty, rid, OutputType, val8, itemsPerBlk); case 2: return nextUnsignedColValue<2>(type, ridArray, NVALS, index, done, isNull, isEmpty, rid, OutputType, val8, itemsPerBlk); case 1: return nextUnsignedColValue<1>(type, ridArray, NVALS, index, done, isNull, isEmpty, rid, OutputType, val8, itemsPerBlk); default: idbassert(0); } /*NOTREACHED*/ return 0; } // done should be init'd to false and // index should be init'd to 0 on the first call // done == true when there are no more elements to return. inline int64_t nextColValueHelper(int type, int width, const uint16_t *ridArray, int NVALS, int *index, bool *done, bool *isNull, bool *isEmpty, uint16_t *rid, uint8_t OutputType, uint8_t *val8, unsigned itemsPerBlk) { switch (width) { case 8: return nextColValue<8>(type, ridArray, NVALS, index, done, isNull, isEmpty, rid, OutputType, val8, itemsPerBlk); case 4: return nextColValue<4>(type, ridArray, NVALS, index, done, isNull, isEmpty, rid, OutputType, val8, itemsPerBlk); case 2: return nextColValue<2>(type, ridArray, NVALS, index, done, isNull, isEmpty, rid, OutputType, val8, itemsPerBlk); case 1: return nextColValue<1>(type, ridArray, NVALS, index, done, isNull, isEmpty, rid, OutputType, val8, itemsPerBlk); default: idbassert(0); } /*NOTREACHED*/ return 0; } #if 0 inline void p_Col_noprid(const NewColRequestHeader *in, NewColResultHeader *out, unsigned outSize, unsigned *written, int* block) { int argIndex, argOffset; uint16_t rid; const ColArgs *args; const uint8_t *in8 = reinterpret_cast(in); int64_t argVal, colVal; uint64_t uargVal, ucolVal; int8_t *val8 = reinterpret_cast(block); int16_t *val16 = reinterpret_cast(block); int32_t *val32 = reinterpret_cast(block); int64_t *val64 = reinterpret_cast(block); uint8_t *uval8 = reinterpret_cast(block); uint16_t *uval16 = reinterpret_cast(block); uint32_t *uval32 = reinterpret_cast(block); uint64_t *uval64 = reinterpret_cast(block); placeholderRegex.used = false; //cout << "NOPRID" << endl; for (argIndex = 0; argIndex < in->NVALS; argIndex++) { argOffset = sizeof(NewColRequestHeader) + (argIndex * (sizeof(ColArgs) + sizeof(int16_t) + in->DataSize)); args = reinterpret_cast(&in8[argOffset]); rid = *reinterpret_cast(&in8[argOffset + sizeof(ColArgs) + in->DataSize]); if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) { switch (in->DataSize) { case 1: uargVal = *reinterpret_cast(args->val[0]); ucolVal = uval8[rid]; break; case 2: uargVal = *reinterpret_cast(args->val); ucolVal = uval16[rid]; break; case 4: uargVal = *reinterpret_cast(args->val); ucolVal = uval32[rid]; break; case 8: uargVal = *reinterpret_cast(args->val); ucolVal = uval64[rid]; break; default: logIt(33, in->DataSize); #ifdef PRIM_DEBUG throw logic_error("PrimitiveProcessor::p_Col_noprid(): bad width"); #endif return; } if (colCompare(ucolVal, uargVal, args->COP, args->rf, in->DataType, in->DataSize, placeholderRegex)) store(in, out, outSize, written, rid, reinterpret_cast(block)); } else { switch (in->DataSize) { case 1: argVal = args->val[0]; colVal = val8[rid]; break; case 2: argVal = *reinterpret_cast(args->val); colVal = val16[rid]; break; case 4: argVal = *reinterpret_cast(args->val); colVal = val32[rid]; break; case 8: argVal = *reinterpret_cast(args->val); colVal = val64[rid]; break; default: logIt(33, in->DataSize); #ifdef PRIM_DEBUG throw logic_error("PrimitiveProcessor::p_Col_noprid(): bad width"); #endif return; } if (colCompare(colVal, argVal, args->COP, args->rf, in->DataType, in->DataSize, placeholderRegex)) store(in, out, outSize, written, rid, reinterpret_cast(block)); } } } #endif template inline void p_Col_ridArray(NewColRequestHeader *in, NewColResultHeader *out, unsigned outSize, unsigned *written, int* block, Stats* fStatsPtr, unsigned itemsPerBlk, boost::shared_ptr parsedColumnFilter) { uint16_t *ridArray=0; uint8_t *in8 = reinterpret_cast(in); const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W; placeholderRegex.used = false; if (in->NVALS>0) ridArray = reinterpret_cast(&in8[sizeof(NewColRequestHeader) + (in->NOPS * filterSize)]); if (ridArray && 1 == in->sort ) { qsort(ridArray, in->NVALS, sizeof(uint16_t), compareBlock); if (fStatsPtr) #ifdef _MSC_VER fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'O'); #else fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'O'); #endif } // Set boolean indicating whether to capture the min and max values. out->ValidMinMax = isMinMaxValid(in); if (out->ValidMinMax) { if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) { out->Min = static_cast(numeric_limits::max()); out->Max = 0; } else { out->Min = numeric_limits::max(); out->Max = numeric_limits::min(); } } else { out->Min = 0; out->Max = 0; } const ColArgs *args=NULL; int64_t val=0; uint64_t uval=0; int nextRidIndex=0, argIndex=0; bool done=false, cmp=false, isNull=false, isEmpty=false; uint16_t rid=0; prestored_set_t::const_iterator it; int64_t* std_argVals = (int64_t*)alloca(in->NOPS * sizeof(int64_t)); uint8_t* std_cops = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t)); uint8_t* std_rfs = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t)); int64_t *argVals = NULL; uint64_t *uargVals = NULL; uint8_t *cops = NULL; uint8_t *rfs = NULL; scoped_array std_regex; idb_regex_t* regex = NULL; uint8_t likeOps = 0; // no pre-parsed column filter is set, parse the filter in the message if (parsedColumnFilter.get() == NULL) { std_regex.reset(new idb_regex_t[in->NOPS]); regex = &(std_regex[0]); if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) { uargVals = reinterpret_cast(std_argVals); cops = std_cops; rfs = std_rfs; for (argIndex = 0; argIndex < in->NOPS; argIndex++) { args = reinterpret_cast(&in8[sizeof(NewColRequestHeader) + (argIndex * filterSize)]); cops[argIndex] = args->COP; rfs[argIndex] = args->rf; switch (W) { case 1: uargVals[argIndex] = *reinterpret_cast(args->val); break; case 2: uargVals[argIndex] = *reinterpret_cast(args->val); break; case 4: uargVals[argIndex] = *reinterpret_cast(args->val); break; case 8: uargVals[argIndex] = *reinterpret_cast(args->val); break; } regex[argIndex].used = false; } } else { argVals = std_argVals; cops = std_cops; rfs = std_rfs; for (argIndex = 0; argIndex < in->NOPS; argIndex++) { args = reinterpret_cast(&in8[sizeof(NewColRequestHeader) + (argIndex * filterSize)]); cops[argIndex] = args->COP; rfs[argIndex] = args->rf; switch (W) { case 1: argVals[argIndex] = args->val[0]; break; case 2: argVals[argIndex] = *reinterpret_cast(args->val); break; case 4: #if 0 if (in->DataType == CalpontSystemCatalog::FLOAT) { double dTmp; dTmp = (double) *((const float *) args->val); argVals[argIndex] = *((int64_t *) &dTmp); } else argVals[argIndex] = *reinterpret_cast(args->val); #else argVals[argIndex] = *reinterpret_cast(args->val); #endif break; case 8: argVals[argIndex] = *reinterpret_cast(args->val); break; } if (COMPARE_LIKE & args->COP) { p_DataValue dv = convertToPDataValue(&argVals[argIndex], W); int err = PrimitiveProcessor::convertToRegexp(®ex[argIndex], &dv); if (err) { throw runtime_error("PrimitiveProcessor::p_Col_ridarray(): Could not create regular expression for LIKE operator"); } ++likeOps; } else regex[argIndex].used = false; } } } // we have a pre-parsed filter, and it's in the form of op and value arrays else if (parsedColumnFilter->columnFilterMode == TWO_ARRAYS) { argVals = parsedColumnFilter->prestored_argVals.get(); uargVals = reinterpret_cast(parsedColumnFilter->prestored_argVals.get()); cops = parsedColumnFilter->prestored_cops.get(); rfs = parsedColumnFilter->prestored_rfs.get(); regex = parsedColumnFilter->prestored_regex.get(); likeOps = parsedColumnFilter->likeOps; } // else we have a pre-parsed filter, and it's an unordered set for quick == comparisons if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) { uval = nextUnsignedColValue(in->DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); } else { val = nextColValue(in->DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); } while (!done) { if (cops == NULL) // implies parsedColumnFilter && columnFilterMode == SET { /* bug 1920: ignore NULLs in the set and in the column data */ if (!(isNull && in->BOP == BOP_AND)) { if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) { it = parsedColumnFilter->prestored_set->find(*reinterpret_cast(&uval)); } else { it = parsedColumnFilter->prestored_set->find(val); } if (in->BOP == BOP_OR) { // assume COP == COMPARE_EQ if (it != parsedColumnFilter->prestored_set->end()) { store(in, out, outSize, written, rid, reinterpret_cast(block)); } } else if (in->BOP == BOP_AND) { // assume COP == COMPARE_NE if (it == parsedColumnFilter->prestored_set->end()) { store(in, out, outSize, written, rid, reinterpret_cast(block)); } } } } else { for (argIndex = 0; argIndex < in->NOPS; argIndex++) { if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) { cmp = colCompareUnsigned(uval, uargVals[argIndex], cops[argIndex], rfs[argIndex], in->DataType, W, regex[argIndex], isNull); } else { cmp = colCompare(val, argVals[argIndex], cops[argIndex], rfs[argIndex], in->DataType, W, regex[argIndex], isNull); } if (in->NOPS == 1) { if (cmp == true) { store(in, out, outSize, written, rid, reinterpret_cast(block)); } break; } else if (in->BOP == BOP_AND && cmp == false) { break; } else if (in->BOP == BOP_OR && cmp == true) { store(in, out, outSize, written, rid, reinterpret_cast(block)); break; } } if ((argIndex == in->NOPS && in->BOP == BOP_AND) || in->NOPS == 0) { store(in, out, outSize, written, rid, reinterpret_cast(block)); } } // Set the min and max if necessary. Ignore nulls. if (out->ValidMinMax && !isNull && !isEmpty) { if ((in->DataType == CalpontSystemCatalog::CHAR || in->DataType == CalpontSystemCatalog::VARCHAR || in->DataType == CalpontSystemCatalog::BLOB || in->DataType == CalpontSystemCatalog::TEXT ) && 1 < W) { if (colCompare(out->Min, val, COMPARE_GT, false, in->DataType, W, placeholderRegex)) out->Min = val; if (colCompare(out->Max, val, COMPARE_LT, false, in->DataType, W, placeholderRegex)) out->Max = val; } else if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) { if (static_cast(out->Min) > uval) out->Min = static_cast(uval); if (static_cast(out->Max) < uval) out->Max = static_cast(uval);; } else { if (out->Min > val) out->Min = val; if (out->Max < val) out->Max = val; } } if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) { uval = nextUnsignedColValue(in->DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); } else { val = nextColValue(in->DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); } } if (fStatsPtr) #ifdef _MSC_VER fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'K'); #else fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'K'); #endif } } //namespace anon namespace primitives { void PrimitiveProcessor::p_Col(NewColRequestHeader *in, NewColResultHeader *out, unsigned outSize, unsigned *written) { memcpy(out, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)); out->NVALS = 0; out->LBID = in->LBID; out->ism.Command = COL_RESULTS; out->OutputType = in->OutputType; out->RidFlags = 0; *written = sizeof(NewColResultHeader); unsigned itemsPerBlk = 0; if (logicalBlockMode) itemsPerBlk = BLOCK_SIZE; else itemsPerBlk = BLOCK_SIZE/in->DataSize; //...Initialize I/O counts; out->CacheIO = 0; out->PhysicalIO = 0; #if 0 // short-circuit the actual block scan for testing if (out->LBID >= 802816) { out->ValidMinMax = false; out->Min = 0; out->Max = 0; return; } #endif if (fStatsPtr) #ifdef _MSC_VER fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'B'); #else fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'B'); #endif switch (in->DataSize) { case 8: p_Col_ridArray<8>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); break; case 4: p_Col_ridArray<4>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); break; case 2: p_Col_ridArray<2>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); break; case 1: p_Col_ridArray<1>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); break; default: idbassert(0); break; } if (fStatsPtr) #ifdef _MSC_VER fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'C'); #else fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'C'); #endif } boost::shared_ptr parseColumnFilter (const uint8_t *filterString, uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP) { boost::shared_ptr ret; uint32_t argIndex; const ColArgs *args; bool convertToSet = true; if (filterCount == 0) return ret; ret.reset(new ParsedColumnFilter()); ret->columnFilterMode = TWO_ARRAYS; ret->prestored_argVals.reset(new int64_t[filterCount]); ret->prestored_cops.reset(new uint8_t[filterCount]); ret->prestored_rfs.reset(new uint8_t[filterCount]); ret->prestored_regex.reset(new idb_regex_t[filterCount]); /* for (unsigned ii = 0; ii < filterCount; ii++) { ret->prestored_argVals[ii] = 0; ret->prestored_cops[ii] = 0; ret->prestored_rfs[ii] = 0; ret->prestored_regex[ii].used = 0; } */ const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + colWidth; /* Decide which structure to use. I think the only cases where we can use the set are when NOPS > 1, BOP is OR, and every COP is ==, and when NOPS > 1, BOP is AND, and every COP is !=. Parse the filter predicates and insert them into argVals and cops. If there were no predicates that violate the condition for using a set, insert argVals into a set. */ if (filterCount == 1) convertToSet = false; for (argIndex = 0; argIndex < filterCount; argIndex++) { args = reinterpret_cast(filterString + (argIndex * filterSize)); ret->prestored_cops[argIndex] = args->COP; ret->prestored_rfs[argIndex] = args->rf; if ((BOP == BOP_OR && args->COP != COMPARE_EQ) || (BOP == BOP_AND && args->COP != COMPARE_NE) || (args->COP == COMPARE_NIL)) convertToSet = false; if (isUnsigned((CalpontSystemCatalog::ColDataType)colType)) { switch (colWidth) { case 1: ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); break; case 2: ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); break; case 4: ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); break; case 8: ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); break; } } else { switch (colWidth) { case 1: ret->prestored_argVals[argIndex] = args->val[0]; break; case 2: ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); break; case 4: #if 0 if (colType == CalpontSystemCatalog::FLOAT) { double dTmp; dTmp = (double) *((const float *) args->val); ret->prestored_argVals[argIndex] = *((int64_t *) &dTmp); } else ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); #else ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); #endif break; case 8: ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); break; } } // cout << "inserted* " << hex << ret->prestored_argVals[argIndex] << dec << // " COP = " << (int) ret->prestored_cops[argIndex] << endl; if (COMPARE_LIKE & args->COP) { p_DataValue dv = convertToPDataValue(&ret->prestored_argVals[argIndex], colWidth); int err = PrimitiveProcessor::convertToRegexp(&ret->prestored_regex[argIndex], &dv); if (err) { throw runtime_error("PrimitiveProcessor::parseColumnFilter(): Could not create regular expression for LIKE operator"); } ++ret->likeOps; } else { ret->prestored_regex[argIndex].used = false; } } if (convertToSet) { ret->columnFilterMode = UNORDERED_SET; ret->prestored_set.reset(new prestored_set_t()); // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression for (argIndex = 0; argIndex < filterCount; argIndex++) if (ret->prestored_rfs[argIndex] == 0) ret->prestored_set->insert(ret->prestored_argVals[argIndex]); } return ret; } } // namespace primitives // vim:ts=4 sw=4: