diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index b4034c732..a6ab67d59 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -188,6 +188,7 @@ LONGTEXT {return LONGTEXT;} BOOL {return BOOL;} BOOLEAN {return BOOLEAN;} MEDIUMINT {return MEDIUMINT;} +BINARY {return BINARY;} \n { lineno++;} diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index f95f3e3fb..510c7a3b0 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -112,7 +112,7 @@ MIN_ROWS MODIFY NO NOT NULL_TOK NUMBER NUMERIC ON PARTIAL PRECISION PRIMARY REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TINYBLOB TINYTEXT TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET COLLATE IDB_IF EXISTS CHANGE TRUNCATE -BOOL BOOLEAN MEDIUMINT TIMESTAMP +BOOL BOOLEAN MEDIUMINT TIMESTAMP BINARY %token DQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE TIME @@ -131,6 +131,7 @@ BOOL BOOLEAN MEDIUMINT TIMESTAMP %type ata_rename_table %type character_string_type %type binary_string_type +%type fixed_binary_string_type %type blob_type %type text_type %type check_constraint_def @@ -747,6 +748,7 @@ opt_column_collate: data_type: character_string_type opt_column_charset opt_column_collate | binary_string_type + | fixed_binary_string_type | numeric_type | datetime_type | blob_type @@ -918,6 +920,14 @@ binary_string_type: } ; +fixed_binary_string_type: + BINARY '(' ICONST ')' + { + $$ = new ColumnType(DDL_BINARY); + $$->fLength = atoi($3); + } + ; + blob_type: BLOB '(' ICONST ')' { diff --git a/dbcon/ddlpackage/ddlpkg.h b/dbcon/ddlpackage/ddlpkg.h index 0c7edd389..821efa440 100644 --- a/dbcon/ddlpackage/ddlpkg.h +++ b/dbcon/ddlpackage/ddlpkg.h @@ -238,6 +238,7 @@ enum DDL_DATATYPES DDL_TEXT, DDL_TIME, DDL_TIMESTAMP, + DDL_BINARY, DDL_INVALID_DATATYPE }; @@ -276,7 +277,8 @@ const std::string DDLDatatypeString[] = "unsigned-numeric", "text", "time", - "timestamp" + "timestamp", + "binary", "" }; diff --git a/dbcon/ddlpackageproc/altertableprocessor.cpp b/dbcon/ddlpackageproc/altertableprocessor.cpp index 743779c68..1e0fa76e2 100644 --- a/dbcon/ddlpackageproc/altertableprocessor.cpp +++ b/dbcon/ddlpackageproc/altertableprocessor.cpp @@ -247,6 +247,11 @@ bool typesAreSame(const CalpontSystemCatalog::ColType& colType, const ColumnType break; + case (CalpontSystemCatalog::BINARY): + if (newType.fType == DDL_BINARY && colType.colWidth == newType.fLength) return true; + + break; + default: break; } diff --git a/dbcon/ddlpackageproc/ddlpackageprocessor.cpp b/dbcon/ddlpackageproc/ddlpackageprocessor.cpp index 0419fc70e..b26392b0c 100644 --- a/dbcon/ddlpackageproc/ddlpackageprocessor.cpp +++ b/dbcon/ddlpackageproc/ddlpackageprocessor.cpp @@ -251,6 +251,10 @@ execplan::CalpontSystemCatalog::ColDataType DDLPackageProcessor::convertDataType case ddlpackage::DDL_TEXT: colDataType = CalpontSystemCatalog::TEXT; break; + + case ddlpackage::DDL_BINARY: + colDataType = CalpontSystemCatalog::BINARY; + break; default: throw runtime_error("Unsupported datatype!"); diff --git a/dbcon/execplan/calpontsystemcatalog.cpp b/dbcon/execplan/calpontsystemcatalog.cpp index 042abcc04..02c72c573 100644 --- a/dbcon/execplan/calpontsystemcatalog.cpp +++ b/dbcon/execplan/calpontsystemcatalog.cpp @@ -226,6 +226,10 @@ const string colDataTypeToString(CalpontSystemCatalog::ColDataType cdt) return "udouble"; break; + case CalpontSystemCatalog::BINARY: + return "binary"; + break; + default: break; } diff --git a/dbcon/execplan/calpontsystemcatalog.h b/dbcon/execplan/calpontsystemcatalog.h index e59663375..b89861b07 100644 --- a/dbcon/execplan/calpontsystemcatalog.h +++ b/dbcon/execplan/calpontsystemcatalog.h @@ -170,7 +170,8 @@ public: NUM_OF_COL_DATA_TYPE, /* NEW TYPES ABOVE HERE */ LONGDOUBLE, /* @bug3241, dev and variance calculation only */ STRINT, /* @bug3532, string as int for fast comparison */ - UNDEFINED /*!< Undefined - used in UDAF API */ + UNDEFINED, /*!< Undefined - used in UDAF API */ + BINARY, /*!< BINARY type */ }; /** the set of column constraint types @@ -1212,6 +1213,13 @@ inline bool isNull(int64_t val, const execplan::CalpontSystemCatalog::ColType& c break; } + case execplan::CalpontSystemCatalog::BINARY: + { + ret = false; + + break; + } + default: break; } diff --git a/dbcon/execplan/predicateoperator.cpp b/dbcon/execplan/predicateoperator.cpp index 3f4aee1f2..02dc3a710 100644 --- a/dbcon/execplan/predicateoperator.cpp +++ b/dbcon/execplan/predicateoperator.cpp @@ -126,6 +126,7 @@ bool PredicateOperator::operator!=(const TreeNode* t) const } //FIXME: VARBINARY??? +//FIXME: BINARY??? void PredicateOperator::setOpType(Type& l, Type& r) { fOperationType = l; // Default to left side. Modify as needed. diff --git a/dbcon/execplan/simplecolumn.cpp b/dbcon/execplan/simplecolumn.cpp index 8110c5296..a58ad66b7 100644 --- a/dbcon/execplan/simplecolumn.cpp +++ b/dbcon/execplan/simplecolumn.cpp @@ -690,6 +690,13 @@ void SimpleColumn::evaluate(Row& row, bool& isNull) break; } + case CalpontSystemCatalog::BINARY: + + { + fResult.strVal = row.getBinaryField(fInputIndex); + break; + } + default: // treat as int64 { fResult.intVal = row.getUintField<8>(fInputIndex); diff --git a/dbcon/execplan/simplecolumn_decimal.h b/dbcon/execplan/simplecolumn_decimal.h index c49b597c5..7a5acbb56 100644 --- a/dbcon/execplan/simplecolumn_decimal.h +++ b/dbcon/execplan/simplecolumn_decimal.h @@ -25,6 +25,7 @@ #ifndef SIMPLECOLUMNDECIMAL_H #define SIMPLECOLUMNDECIMAL_H +#include #include #include @@ -141,7 +142,8 @@ void SimpleColumn_Decimal::setNullVal() case 1: fNullVal = joblist::TINYINTNULL; break; - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; default: fNullVal = joblist::BIGINTNULL; } @@ -223,6 +225,8 @@ void SimpleColumn_Decimal::serialize(messageqcpp::ByteStream& b) const case 8: b << (ObjectReader::id_t) ObjectReader::SIMPLECOLUMN_DECIMAL8; break; + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; } SimpleColumn::serialize(b); @@ -248,6 +252,8 @@ void SimpleColumn_Decimal::unserialize(messageqcpp::ByteStream& b) case 8: ObjectReader::checkType(b, ObjectReader::SIMPLECOLUMN_DECIMAL8); break; + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << std::endl; } SimpleColumn::unserialize(b); diff --git a/dbcon/execplan/simplecolumn_int.h b/dbcon/execplan/simplecolumn_int.h index 9454bba86..4a770ec89 100644 --- a/dbcon/execplan/simplecolumn_int.h +++ b/dbcon/execplan/simplecolumn_int.h @@ -25,6 +25,7 @@ #ifndef SIMPLECOLUMNINT_H #define SIMPLECOLUMNINT_H +#include #include #include "simplecolumn.h" @@ -241,6 +242,8 @@ void SimpleColumn_INT::serialize(messageqcpp::ByteStream& b) const case 8: b << (ObjectReader::id_t) ObjectReader::SIMPLECOLUMN_INT8; break; + case 16: + std::cout << __FILE__<< ":" << __LINE__ << " Fix for 16 Bytes ?" << std::endl; } SimpleColumn::serialize(b); @@ -266,6 +269,8 @@ void SimpleColumn_INT::unserialize(messageqcpp::ByteStream& b) case 8: ObjectReader::checkType(b, ObjectReader::SIMPLECOLUMN_INT8); break; + case 16: + std::cout << __FILE__<< ":" << __LINE__ << " Fix for 16 Bytes ?" << std::endl; } SimpleColumn::unserialize(b); diff --git a/dbcon/execplan/simplecolumn_uint.h b/dbcon/execplan/simplecolumn_uint.h index 731242809..289a62463 100644 --- a/dbcon/execplan/simplecolumn_uint.h +++ b/dbcon/execplan/simplecolumn_uint.h @@ -25,6 +25,7 @@ #ifndef SIMPLECOLUMNUINT_H #define SIMPLECOLUMNUINT_H +#include #include #include "simplecolumn.h" @@ -140,7 +141,8 @@ void SimpleColumn_UINT::setNullVal() case 1: fNullVal = joblist::UTINYINTNULL; break; - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << std::endl; default: fNullVal = joblist::UBIGINTNULL; } @@ -241,6 +243,8 @@ void SimpleColumn_UINT::serialize(messageqcpp::ByteStream& b) const case 8: b << (ObjectReader::id_t) ObjectReader::SIMPLECOLUMN_UINT8; break; + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << std::endl; } SimpleColumn::serialize(b); @@ -266,6 +270,8 @@ void SimpleColumn_UINT::unserialize(messageqcpp::ByteStream& b) case 8: ObjectReader::checkType(b, ObjectReader::SIMPLECOLUMN_UINT8); break; + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << std::endl; } SimpleColumn::unserialize(b); diff --git a/dbcon/execplan/simplefilter.cpp b/dbcon/execplan/simplefilter.cpp index 3fcc8fcd6..f394f5e51 100644 --- a/dbcon/execplan/simplefilter.cpp +++ b/dbcon/execplan/simplefilter.cpp @@ -219,6 +219,7 @@ const string SimpleFilter::data() const fRhs->resultType().colDataType == CalpontSystemCatalog::DATE || fRhs->resultType().colDataType == CalpontSystemCatalog::DATETIME || fRhs->resultType().colDataType == CalpontSystemCatalog::TIMESTAMP || + fRhs->resultType().colDataType == CalpontSystemCatalog::BINARY || fRhs->resultType().colDataType == CalpontSystemCatalog::TIME)) rhs = "'" + SimpleFilter::escapeString(fRhs->data()) + "'"; else @@ -233,6 +234,7 @@ const string SimpleFilter::data() const fLhs->resultType().colDataType == CalpontSystemCatalog::DATE || fLhs->resultType().colDataType == CalpontSystemCatalog::TIME || fLhs->resultType().colDataType == CalpontSystemCatalog::TIMESTAMP || + fLhs->resultType().colDataType == CalpontSystemCatalog::BINARY || fLhs->resultType().colDataType == CalpontSystemCatalog::DATETIME)) lhs = "'" + SimpleFilter::escapeString(fLhs->data()) + "'"; else diff --git a/dbcon/execplan/treenode.h b/dbcon/execplan/treenode.h index 5fab31a02..e50c3f30f 100644 --- a/dbcon/execplan/treenode.h +++ b/dbcon/execplan/treenode.h @@ -741,6 +741,10 @@ inline const std::string& TreeNode::getStrVal(const std::string& timeZone) break; } + case CalpontSystemCatalog::BINARY: + { + break; + } default: throw logging::InvalidConversionExcept("TreeNode::getStrVal: Invalid conversion."); } @@ -1078,6 +1082,7 @@ inline IDB_Decimal TreeNode::getDecimalVal() case CalpontSystemCatalog::VARBINARY: case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::BINARY: throw logging::InvalidConversionExcept("TreeNode::getDecimalVal: non-support conversion from binary string"); case CalpontSystemCatalog::BIGINT: diff --git a/dbcon/execplan/windowfunctioncolumn.cpp b/dbcon/execplan/windowfunctioncolumn.cpp index b4ebd8b42..f14d9656f 100644 --- a/dbcon/execplan/windowfunctioncolumn.cpp +++ b/dbcon/execplan/windowfunctioncolumn.cpp @@ -478,7 +478,8 @@ void WindowFunctionColumn::evaluate(Row& row, bool& isNull) fResult.origIntVal = row.getUintField<8>(fInputIndex); break; - + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << endl; default: if (row.equals(CPNULLSTRMARK, fInputIndex)) isNull = true; diff --git a/dbcon/joblist/batchprimitiveprocessor-jl.cpp b/dbcon/joblist/batchprimitiveprocessor-jl.cpp index 0f04c8a8e..523e9a44a 100644 --- a/dbcon/joblist/batchprimitiveprocessor-jl.cpp +++ b/dbcon/joblist/batchprimitiveprocessor-jl.cpp @@ -667,7 +667,8 @@ void BatchPrimitiveProcessorJL::getTuples(messageqcpp::ByteStream& in, columnData[j]++; pos++; break; - + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << endl; default: cout << "BPP::getTuples(): bad column width of " << colWidths[j] << endl; diff --git a/dbcon/joblist/columncommand-jl.cpp b/dbcon/joblist/columncommand-jl.cpp index 1ea451b19..b8ef18f5d 100644 --- a/dbcon/joblist/columncommand-jl.cpp +++ b/dbcon/joblist/columncommand-jl.cpp @@ -269,7 +269,8 @@ uint8_t ColumnCommandJL::getTableColumnType() case 1: return TableColumn::UINT8; - + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << endl; default: throw logic_error("ColumnCommandJL: bad column width"); } diff --git a/dbcon/joblist/jlf_common.cpp b/dbcon/joblist/jlf_common.cpp index c40425b16..d6bd3e5a6 100644 --- a/dbcon/joblist/jlf_common.cpp +++ b/dbcon/joblist/jlf_common.cpp @@ -332,6 +332,8 @@ string extractTableAlias(const SSC& sc) //------------------------------------------------------------------------------ CalpontSystemCatalog::OID isDictCol(const CalpontSystemCatalog::ColType& colType) { + if (colType.colDataType == CalpontSystemCatalog::BINARY) return 0; + if (colType.colWidth > 8) return colType.ddn.dictOID; if (colType.colDataType == CalpontSystemCatalog::VARCHAR && diff --git a/dbcon/joblist/joblisttypes.h b/dbcon/joblist/joblisttypes.h index 6d5664c57..9ad0e071c 100644 --- a/dbcon/joblist/joblisttypes.h +++ b/dbcon/joblist/joblisttypes.h @@ -83,6 +83,9 @@ const uint16_t NULL_UINT16 = USMALLINTNULL; const uint32_t NULL_UINT32 = UINTNULL; const uint64_t NULL_UINT64 = UBIGINTNULL; +const uint64_t BINARYEMPTYROW = 0; +const uint64_t BINARYNULL = 0; + const std::string CPNULLSTRMARK("_CpNuLl_"); const std::string CPSTRNOTFOUND("_CpNoTf_"); diff --git a/dbcon/joblist/lbidlist.cpp b/dbcon/joblist/lbidlist.cpp index 631e9a597..b6cc575ae 100644 --- a/dbcon/joblist/lbidlist.cpp +++ b/dbcon/joblist/lbidlist.cpp @@ -699,6 +699,8 @@ bool LBIDList::CasualPartitionPredicate(const int64_t Min, uint64_t val = *(int64_t*)MsgDataPtr; value = static_cast(val); } + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; } } else @@ -731,6 +733,8 @@ bool LBIDList::CasualPartitionPredicate(const int64_t Min, int64_t val = *(int64_t*)MsgDataPtr; value = val; } + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; } } diff --git a/dbcon/joblist/passthrucommand-jl.cpp b/dbcon/joblist/passthrucommand-jl.cpp index a11571c87..222f9e5bd 100644 --- a/dbcon/joblist/passthrucommand-jl.cpp +++ b/dbcon/joblist/passthrucommand-jl.cpp @@ -73,7 +73,12 @@ PassThruCommandJL::PassThruCommandJL(const PassThruStep& p) case 8: tableColumnType = TableColumn::UINT64; break; - + + case 16: + case 32: + tableColumnType = TableColumn::STRING; + break; + default: throw logic_error("PassThruCommandJL(): bad column width?"); } diff --git a/dbcon/joblist/pcolscan.cpp b/dbcon/joblist/pcolscan.cpp index da35ad8ce..3a9384353 100644 --- a/dbcon/joblist/pcolscan.cpp +++ b/dbcon/joblist/pcolscan.cpp @@ -178,7 +178,7 @@ pColScanStep::pColScanStep( fColType.colWidth = 8; fIsDict = true; } - else if (fColType.colWidth > 8 ) + else if (fColType.colWidth > 8 && fColType.colDataType != CalpontSystemCatalog::BINARY) { fColType.colWidth = 8; fIsDict = true; diff --git a/dbcon/joblist/pcolstep.cpp b/dbcon/joblist/pcolstep.cpp index b9809e8e0..5b07d9374 100644 --- a/dbcon/joblist/pcolstep.cpp +++ b/dbcon/joblist/pcolstep.cpp @@ -177,7 +177,7 @@ pColStep::pColStep( fColType.colWidth = 8; fIsDict = true; } - else if (fColType.colWidth > 8 ) + else if (fColType.colWidth > 8 && fColType.colDataType != CalpontSystemCatalog::BINARY ) { fColType.colWidth = 8; fIsDict = true; diff --git a/dbcon/mysql/ha_mcs_ddl.cpp b/dbcon/mysql/ha_mcs_ddl.cpp index 82ea00a17..08cfe199d 100644 --- a/dbcon/mysql/ha_mcs_ddl.cpp +++ b/dbcon/mysql/ha_mcs_ddl.cpp @@ -251,6 +251,10 @@ uint32_t convertDataType(int dataType) case ddlpackage::DDL_UNSIGNED_DOUBLE: calpontDataType = CalpontSystemCatalog::UDOUBLE; break; + + case ddlpackage::DDL_BINARY: + calpontDataType = CalpontSystemCatalog::BINARY; + break; default: throw runtime_error("Unsupported datatype!"); diff --git a/dbcon/mysql/ha_mcs_impl.cpp b/dbcon/mysql/ha_mcs_impl.cpp index 2f8cbe220..b3018be57 100644 --- a/dbcon/mysql/ha_mcs_impl.cpp +++ b/dbcon/mysql/ha_mcs_impl.cpp @@ -817,7 +817,16 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h break; } + case CalpontSystemCatalog::BINARY: + { + Field_varstring* f2 = (Field_varstring*)*f; + f2->store(row.getBinaryField(s).c_str(), 16, f2->charset()); + if ((*f)->null_ptr) + *(*f)->null_ptr &= ~(*f)->null_bit; + + break; + } default: // treat as int64 { intColVal = row.getUintField<8>(s); diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index a8b3dc838..243858157 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -273,6 +273,21 @@ inline bool colStrCompare_(uint64_t val1, uint64_t val2, uint8_t COP, const idb_ template inline bool isEmptyVal(uint8_t type, const uint8_t* val8); +template<> +inline bool isEmptyVal<32>(uint8_t type, const uint8_t* ival) // For BINARY +{ + const uint64_t* val = reinterpret_cast(ival); + return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYEMPTYROW) + && (val[2] == joblist::BINARYEMPTYROW) && (val[3] == joblist::BINARYEMPTYROW)); +} + +template<> +inline bool isEmptyVal<16>(uint8_t type, const uint8_t* ival) // For BINARY +{ + const uint64_t* val = reinterpret_cast(ival); + return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYEMPTYROW)); +} + template<> inline bool isEmptyVal<8>(uint8_t type, const uint8_t* ival) { @@ -394,6 +409,21 @@ inline bool isEmptyVal<1>(uint8_t type, const uint8_t* ival) template inline bool isNullVal(uint8_t type, const uint8_t* val8); +template<> +inline bool isNullVal<16>(uint8_t type, const uint8_t* ival) // For BINARY +{ + const uint64_t* val = reinterpret_cast(ival); + return ((val[0] == joblist::BINARYNULL) && (val[1] == joblist::BINARYNULL)); +} + +template<> +inline bool isNullVal<32>(uint8_t type, const uint8_t* ival) // For BINARY +{ + const uint64_t* val = reinterpret_cast(ival); + return ((val[0] == joblist::BINARYNULL) && (val[1] == joblist::BINARYNULL) + && (val[2] == joblist::BINARYNULL) && (val[3] == joblist::BINARYNULL)); +} + template<> inline bool isNullVal<8>(uint8_t type, const uint8_t* ival) { @@ -521,6 +551,12 @@ inline bool isNullVal(uint32_t length, uint8_t type, const uint8_t* val8) { switch (length) { + case 32: + return isNullVal<32>(type, val8); + + case 16: + return isNullVal<16>(type, val8); + case 8: return isNullVal<8>(type, val8); @@ -703,6 +739,16 @@ inline void store(const NewColRequestHeader* in, switch (in->DataSize) { + case 32: + ptr2 += (rid << 5); + memcpy(ptr1, ptr2, 32); + break; + + case 16: + ptr2 += (rid << 4); + memcpy(ptr1, ptr2, 16); + break; + default: case 8: ptr2 += (rid << 3); @@ -724,7 +770,6 @@ inline void store(const NewColRequestHeader* in, memcpy(ptr1, ptr2, 1); break; } - *written += in->DataSize; } @@ -811,6 +856,66 @@ inline uint64_t nextUnsignedColValue(int type, return -1; } } +template +inline uint8_t* nextBinColValue(int type, + const uint16_t* ridArray, + int NVALS, + int* index, + bool* done, + bool* isNull, + bool* isEmpty, + uint16_t* rid, + uint8_t OutputType, uint8_t* val8, unsigned itemsPerBlk) +{ + if (ridArray == NULL) + { + while (static_cast(*index) < itemsPerBlk && + isEmptyVal(type, &val8[*index * W]) && + (OutputType & OT_RID)) + { + (*index)++; + } + + + if (static_cast(*index) >= itemsPerBlk) + { + *done = true; + return NULL; + } + *rid = (*index)++; + } + else + { + //FIXME: not complete nor tested . How make execution flow pass here + // whe is ridArray not NULL ? fidn by id? how? + while (*index < NVALS && + isEmptyVal(type, &val8[ridArray[*index] * W])) + { + (*index)++; + } + + if (*index >= NVALS) + { + *done = true; + return NULL; + } + *rid = ridArray[(*index)++]; + } + + *isNull = isNullVal(type, val8); + *isEmpty = isEmptyVal(type, val8); + //cout << "nextUnsignedColValue index " << *index << " rowid " << *rid << endl; + // at this point, nextRid is the index to return, and index is... + // if RIDs are not specified, nextRid + 1, + // if RIDs are specified, it's the next index in the rid array. + return &val8[*rid * W]; + +#ifdef PRIM_DEBUG + throw logic_error("PrimitiveProcessor::nextColBinValue() bad width"); +#endif + return NULL; +} +} template inline int64_t nextColValue(int type, @@ -1426,6 +1531,225 @@ inline void p_Col_ridArray(NewColRequestHeader* in, #endif } +// for BINARY +template +inline void p_Col_bin_ridArray(NewColRequestHeader* in, + NewColResultHeader* out, + unsigned outSize, + unsigned* written, int* block, Stats* fStatsPtr, unsigned itemsPerBlk, + boost::shared_ptr parsedColumnFilter) +{ + uint16_t* ridArray = 0; + uint8_t* in8 = reinterpret_cast(in); + const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W; + idb_regex_t placeholderRegex; + placeholderRegex.used = false; + + //FIXME: pCol is setting it to 8192 cause logicalBlockMode is true + if(itemsPerBlk == BLOCK_SIZE){ + itemsPerBlk = BLOCK_SIZE/W; + } + + if (in->NVALS > 0) + ridArray = reinterpret_cast(&in8[sizeof(NewColRequestHeader) + + (in->NOPS * filterSize)]); + + if (ridArray && 1 == in->sort ) + { + qsort(ridArray, in->NVALS, sizeof(uint16_t), compareBlock); + + if (fStatsPtr) +#ifdef _MSC_VER + fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'O'); + +#else + fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'O'); +#endif + } + + // Set boolean indicating whether to capture the min and max values. + out->ValidMinMax = isMinMaxValid(in); + + if (out->ValidMinMax) + { + if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType)) + { + out->Min = static_cast(numeric_limits::max()); + out->Max = 0; + } + else + { + out->Min = numeric_limits::max(); + out->Max = numeric_limits::min(); + } + } + else + { + out->Min = 0; + out->Max = 0; + } + + typedef char binWtype [W]; + + const ColArgs* args = NULL; + int64_t val = 0; + binWtype* bval; + int nextRidIndex = 0, argIndex = 0; + bool done = false, cmp = false, isNull = false, isEmpty = false; + uint16_t rid = 0; + prestored_set_t::const_iterator it; + + binWtype* argVals = (binWtype*)alloca(in->NOPS * W); + uint8_t* std_cops = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t)); + uint8_t* std_rfs = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t)); + uint8_t* cops = NULL; + uint8_t* rfs = NULL; + + scoped_array std_regex; + idb_regex_t* regex = NULL; + uint8_t likeOps = 0; + +// no pre-parsed column filter is set, parse the filter in the message + if (parsedColumnFilter.get() == NULL) { + std_regex.reset(new idb_regex_t[in->NOPS]); + regex = &(std_regex[0]); + + cops = std_cops; + rfs = std_rfs; + + for (argIndex = 0; argIndex < in->NOPS; argIndex++) { + args = reinterpret_cast (&in8[sizeof (NewColRequestHeader) + + (argIndex * filterSize)]); + cops[argIndex] = args->COP; + rfs[argIndex] = args->rf; + + memcpy(argVals[argIndex],args->val, W); + } + + regex[argIndex].used = false; + } + + + // else we have a pre-parsed filter, and it's an unordered set for quick == comparisons + bval = (binWtype*)nextBinColValue(in->DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, + &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); + + while (!done) + { + +// if((*((uint64_t *) (bval))) != 0) +// { +// cout << "rid "<< rid << " value "; +// if(W > 16) printf("%016X%016X ",( *(((uint64_t *) (bval)) +3)),(*(((uint64_t *) (bval)) +2))); +// printf("%016X%016X ",( *(((uint64_t *) (bval)) +1)),(*((uint64_t *) (bval))) ); +// +// cout << endl; +// } + + if (cops == NULL) // implies parsedColumnFilter && columnFilterMode == SET + { + /* bug 1920: ignore NULLs in the set and in the column data */ + if (!(isNull && in->BOP == BOP_AND)) + { + + it = parsedColumnFilter->prestored_set->find(val); + + + if (in->BOP == BOP_OR) + { + // assume COP == COMPARE_EQ + if (it != parsedColumnFilter->prestored_set->end()) + { + store(in, out, outSize, written, rid, reinterpret_cast(block)); + } + } + else if (in->BOP == BOP_AND) + { + // assume COP == COMPARE_NE + if (it == parsedColumnFilter->prestored_set->end()) + { + store(in, out, outSize, written, rid, reinterpret_cast(block)); + } + } + } + } + else + { + for (argIndex = 0; argIndex < in->NOPS; argIndex++) + { + +// if((*((uint64_t *) (uval))) != 0) cout << "comparing " << dec << (*((uint64_t *) (uval))) << " to " << (*((uint64_t *) (argVals[argIndex]))) << endl; + + int val1 = memcmp(*bval, &argVals[argIndex], W); + + switch (cops[argIndex]) { + case COMPARE_NIL: + cmp = false; + break; + case COMPARE_LT: + cmp = val1 < 0; + break; + case COMPARE_EQ: + cmp = val1 == 0; + break; + case COMPARE_LE: + cmp = val1 <= 0; + break; + case COMPARE_GT: + cmp = val1 > 0; + break; + case COMPARE_NE: + cmp = val1 != 0; + break; + case COMPARE_GE: + cmp = val1 >= 0; + break; + default: + logIt(34, cops[argIndex], "colCompare"); + cmp = false; // throw an exception here? + } + +// cout << cmp << endl; + + if (in->NOPS == 1) + { + if (cmp == true) + { + store(in, out, outSize, written, rid, reinterpret_cast(block)); + } + + break; + } + else if (in->BOP == BOP_AND && cmp == false) + { + break; + } + else if (in->BOP == BOP_OR && cmp == true) + { + store(in, out, outSize, written, rid, reinterpret_cast(block)); + break; + } + } + + if ((argIndex == in->NOPS && in->BOP == BOP_AND) || in->NOPS == 0) + { + store(in, out, outSize, written, rid, reinterpret_cast(block)); + } + } + + bval = (binWtype*)nextBinColValue(in->DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, + &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); + + } + + if (fStatsPtr) +#ifdef _MSC_VER + fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'K'); + +#else + fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'K'); +#endif + } //namespace anon namespace primitives @@ -1476,6 +1800,14 @@ void PrimitiveProcessor::p_Col(NewColRequestHeader* in, NewColResultHeader* out, switch (in->DataSize) { + case 32: + p_Col_bin_ridArray<32>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); + break; + + case 16: + p_Col_bin_ridArray<16>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); + break; + case 8: p_Col_ridArray<8>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); break; @@ -1578,7 +1910,9 @@ boost::shared_ptr parseColumnFilter case 8: ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); - break; + break; + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; } } else @@ -1614,6 +1948,8 @@ boost::shared_ptr parseColumnFilter case 8: ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); break; + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; } } diff --git a/primitives/linux-port/tdriver.cpp b/primitives/linux-port/tdriver.cpp index 0d7a00c9b..0d80141a5 100644 --- a/primitives/linux-port/tdriver.cpp +++ b/primitives/linux-port/tdriver.cpp @@ -39,9 +39,9 @@ #include #include "primitiveprocessor.h" +using namespace primitives; using namespace std; - int done; void alarm_handler(int sig) @@ -87,7 +87,6 @@ class PrimTest : public CppUnit::TestFixture CPPUNIT_TEST(p_IdxList_1); CPPUNIT_TEST(p_IdxList_2); - // whole block tests CPPUNIT_TEST(p_Col_1); CPPUNIT_TEST(p_Col_2); @@ -162,7 +161,11 @@ class PrimTest : public CppUnit::TestFixture // CPPUNIT_TEST(p_Dictionary_like_prefixbench_1); // CPPUNIT_TEST(p_Dictionary_like_substrbench_1); - + +// binary data type + CPPUNIT_TEST(p_Col_bin_16); + CPPUNIT_TEST(p_Col_bin_32); + CPPUNIT_TEST_SUITE_END(); private: @@ -3744,6 +3747,178 @@ public: close(fd); } + + template struct binary; + typedef binary<16> binary16; + typedef binary<32> binary32; + template + struct binary { + unsigned char data[W]; // May be ok for empty value ? + void operator=(uint64_t v) {*((uint64_t *) data) = v; memset(data + 8, 0, W - 8);} + inline uint8_t& operator[](const int index) {return *((uint8_t*) (data + index));} + inline uint64_t& uint64(const int index) {return *((uint64_t*) (data + (index << 3)));} + }; + + void p_Col_bin_16() + { + PrimitiveProcessor pp; + uint8_t input[BLOCK_SIZE], output[4 * BLOCK_SIZE], block[BLOCK_SIZE]; + NewColRequestHeader* in; + NewColResultHeader* out; + ColArgs* args; + binary16* results; + uint32_t written, i; + int fd; + binary16 tmp; + binary16* bin16 = (binary16*) block; + + for(int i = 0; i < BLOCK_SIZE/16; i++) + { + bin16[i] = 0; + } + + bin16[0].uint64(0) = 10UL; + + bin16[1].uint64(0) = 1000UL; + + bin16[3].uint64(0) = 1000UL; + bin16[3].uint64(1) = 1; + + bin16[4].uint64(0) = 256; + bin16[4].uint64(1) = 1; + + typedef char bin16_t[16]; + + *(uint64_t*)(((bin16_t*)block) + 5) = 500; + + *(uint64_t*)&((bin16_t*)block)[6] = 501; + + memset(input, 0, BLOCK_SIZE); + memset(output, 0, 4 * BLOCK_SIZE); + + in = reinterpret_cast(input); + out = reinterpret_cast(output); + args = reinterpret_cast(&input[sizeof(NewColRequestHeader)]); + + in->DataSize = sizeof(binary16); + in->DataType = execplan::CalpontSystemCatalog::BINARY; + in->OutputType = OT_DATAVALUE; + in->NOPS = 3; + in->BOP = BOP_OR; + in->NVALS = 0; + + tmp = 10; + args->COP = COMPARE_EQ; + memcpy(args->val, &tmp, in->DataSize); + args = reinterpret_cast (args->val + in->DataSize); + + args->COP = COMPARE_EQ; + tmp = 1000; + memcpy(args->val, &tmp, in->DataSize); + + args = reinterpret_cast (args->val + in->DataSize); + tmp.uint64(0) = 256; + tmp.uint64(1) = 1; + args->COP = COMPARE_EQ; + memcpy(args->val, &tmp, in->DataSize); + + pp.setBlockPtr((int*) block); + pp.p_Col(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); +// cout << "NVALS = " << out->NVALS << endl; + CPPUNIT_ASSERT_EQUAL((uint16_t)3, out->NVALS); + CPPUNIT_ASSERT_EQUAL((u_int64_t)10, results[0].uint64(0)); + CPPUNIT_ASSERT_EQUAL((u_int64_t)1000, results[1].uint64(0)); + for (i = 0; i < out->NVALS; i++) { + printf("Result %d Value %016X%016X\n",i ,results[i].uint64(1),results[i].uint64(0) ); +// CPPUNIT_ASSERT(results[i] == (uint32_t) (i < 10 ? i : i - 10 + 1001)); + } + } + + void p_Col_bin_32() + { + PrimitiveProcessor pp; + uint8_t input[2 * BLOCK_SIZE], output[8 * BLOCK_SIZE], block[BLOCK_SIZE]; + NewColRequestHeader* in; + NewColResultHeader* out; + ColArgs* args; + binary32* results; + uint32_t written, i; + int fd; + binary32 tmp; + binary32* bin32 = (binary32*) block; + + for(int i = 0; i < BLOCK_SIZE/32; i++) + { + bin32[i].uint64(0) = 0; + } + + bin32[0].uint64(0) = 10UL; + + bin32[1].uint64(0) = 1000UL; + + bin32[3].uint64(0) = 1000UL; + bin32[3].uint64(1) = 1; + + bin32[4].uint64(0) = 256; + bin32[4].uint64(1) = 254; + bin32[4].uint64(2) = 253; + bin32[4].uint64(3) = 252; + + typedef char bin32_t[32]; + + *(uint64_t*)(((bin32_t*)block) + 5) = 500; + + *(uint64_t*)&((bin32_t*)block)[6] = 501; + + memset(input, 0, BLOCK_SIZE); + memset(output, 0, 4 * BLOCK_SIZE); + + in = reinterpret_cast(input); + out = reinterpret_cast(output); + args = reinterpret_cast(&input[sizeof(NewColRequestHeader)]); + + in->DataSize = sizeof(binary32); + in->DataType = execplan::CalpontSystemCatalog::BINARY; + in->OutputType = OT_DATAVALUE; + in->NOPS = 3; + in->BOP = BOP_OR; + in->NVALS = 0; + + tmp = 10; + args->COP = COMPARE_EQ; + memcpy(args->val, &tmp, in->DataSize); + args = reinterpret_cast (args->val + in->DataSize); + + args->COP = COMPARE_EQ; + tmp = 1000; + memcpy(args->val, &tmp, in->DataSize); + + args = reinterpret_cast (args->val + in->DataSize); + tmp.uint64(0) = 256; + tmp.uint64(1) = 254; + tmp.uint64(2) = 253; + tmp.uint64(3) = 252; + + args->COP = COMPARE_EQ; + memcpy(args->val, &tmp, in->DataSize); + + pp.setBlockPtr((int*) block); + pp.p_Col(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); +// cout << "NVALS = " << out->NVALS << endl; + CPPUNIT_ASSERT_EQUAL((uint16_t)3, out->NVALS); +// CPPUNIT_ASSERT_EQUAL((u_int64_t)10, results[0].uint64(0)); +// CPPUNIT_ASSERT_EQUAL((u_int64_t)1000, results[1].uint64(0)); + for (i = 0; i < out->NVALS; i++) { + printf("Result %d Value %016X%016X%016X%016X\n",i ,results[i].uint64(3),results[i].uint64(2),results[i].uint64(1),results[i].uint64(0) ); +// CPPUNIT_ASSERT(results[i] == (uint32_t) (i < 10 ? i : i - 10 + 1001)); + } + } + + void p_Dictionary_1() { diff --git a/primitives/primproc/columncommand.cpp b/primitives/primproc/columncommand.cpp index db90b3577..7f24f6931 100644 --- a/primitives/primproc/columncommand.cpp +++ b/primitives/primproc/columncommand.cpp @@ -245,7 +245,7 @@ void ColumnCommand::issuePrimitive() bpp->pp.setParsedColumnFilter(parsedColumnFilter); else bpp->pp.setParsedColumnFilter(emptyFilter); - + bpp->pp.p_Col(primMsg, outMsg, bpp->outMsgSize, (unsigned int*)&resultSize); /* Update CP data, the PseudoColumn code should always be !_isScan. Should be safe @@ -273,6 +273,31 @@ void ColumnCommand::process_OT_BOTH() /* this is verbose and repetative to minimize the work per row */ switch (colType.colWidth) { + case 16: + for (i = 0, pos = sizeof(NewColResultHeader); i < outMsg->NVALS; ++i) + { + if (makeAbsRids) + bpp->absRids[i] = *((uint16_t*) &bpp->outputMsg[pos]) + bpp->baseRid; + + bpp->relRids[i] = *((uint16_t*) &bpp->outputMsg[pos]); + pos += 2; + // values[i] is 8 Bytes wide so coping the pointer to bpp->outputMsg[pos] and crossing fingers + // I dont know the liveness of bpp->outputMsg but also I dont know if there is other memory area I can use + values[i] = (int64_t) &bpp->outputMsg[pos]; + +// cout<< "CC: BIN16 " << i << " " +// << hex +// << *((int64_t*)values[i]) +// << " " +// << *(((int64_t*)values[i]) +1) +// << endl; + pos += 16; + } + + break; + + + case 8: for (i = 0, pos = sizeof(NewColResultHeader); i < outMsg->NVALS; ++i) { @@ -346,6 +371,14 @@ void ColumnCommand::process_OT_DATAVALUE() // cout << "rid Count is " << bpp->ridCount << endl; switch (colType.colWidth) { + case 16: + { + memcpy(values, outMsg + 1, outMsg->NVALS << 3); + cout << " CC: first value is " << values[0] << endl; + break; + } + + case 8: { memcpy(values, outMsg + 1, outMsg->NVALS << 3); @@ -459,6 +492,9 @@ void ColumnCommand::createCommand(ByteStream& bs) bs >> BOP; bs >> filterCount; deserializeInlineVector(bs, lastLbid); + +// cout << __func__ << " colType.colWidth " << colType.colWidth << endl; + // cout << "lastLbid count=" << lastLbid.size() << endl; // for (uint32_t i = 0; i < lastLbid.size(); i++) // cout << " " << lastLbid[i]; @@ -488,7 +524,7 @@ void ColumnCommand::resetCommand(ByteStream& bs) void ColumnCommand::prep(int8_t outputType, bool absRids) { /* make the template NewColRequestHeader */ - + baseMsgLength = sizeof(NewColRequestHeader) + (suppressFilter ? 0 : filterString.length()); @@ -554,7 +590,12 @@ void ColumnCommand::prep(int8_t outputType, bool absRids) shift = 1; mask = 0x01; break; - + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix shift and mask for 16 Bytes ?"<< endl; + shift = 1; + mask = 0x01; + break; + default: cout << "CC: colWidth is " << colType.colWidth << endl; throw logic_error("ColumnCommand: bad column width?"); @@ -751,6 +792,9 @@ void ColumnCommand::projectResultRG(RowGroup& rg, uint32_t pos) break; } + + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; } } diff --git a/primitives/primproc/passthrucommand.cpp b/primitives/primproc/passthrucommand.cpp index 407c257dc..d33d7a6f0 100644 --- a/primitives/primproc/passthrucommand.cpp +++ b/primitives/primproc/passthrucommand.cpp @@ -77,6 +77,9 @@ void PassThruCommand::project() switch (colWidth) { + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; + case 8: bpp->serialized->append((uint8_t*) bpp->values, bpp->ridCount << 3); break; @@ -153,6 +156,21 @@ void PassThruCommand::projectIntoRowGroup(RowGroup& rg, uint32_t col) } break; + case 16: + cout << __FILE__ << ":" << __LINE__ << " PassThruCommand::projectIntoRowGroup" << " Addition for 16 Bytes" << endl; + for (i = 0; i < bpp->ridCount; i++) + { + cout << "PTC: " << "BIN16 " << i << " " + << hex + << *((int64_t*) bpp->values[i]) + << " " + << *(((int64_t*) bpp->values[i]) +1) + << endl; + // values[i] is 8 bytes so it contains the pointer to bpp->outputMsg set by ColumnCommand::process_OT_BOTH() + r.setBinaryField((uint8_t*)bpp->values[i], 16, offset); + + r.nextRow(rowSize); + } } } diff --git a/primitives/primproc/pseudocc.cpp b/primitives/primproc/pseudocc.cpp index 1ee33a04b..7550c1b47 100644 --- a/primitives/primproc/pseudocc.cpp +++ b/primitives/primproc/pseudocc.cpp @@ -91,7 +91,8 @@ void PseudoCC::loadData() case 8: loadPMNumber(); break; - + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; default: cout << "PC::loadData(): bad column width" << endl; break; @@ -143,7 +144,8 @@ void PseudoCC::loadData() case 8: loadSegmentNum(); break; - + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; default: cout << "PC::loadData(): bad column width" << endl; break; @@ -170,6 +172,8 @@ void PseudoCC::loadData() loadPartitionNum(); break; + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; default: cout << "PC::loadData(): bad column width" << endl; break; @@ -196,6 +200,8 @@ void PseudoCC::loadData() loadLBID(); break; + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; default: cout << "PC::loadData(): bad column width" << endl; break; @@ -221,7 +227,9 @@ void PseudoCC::loadData() case 8: loadDBRootNum(); break; - + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; + default: cout << "PC::loadData(): bad column width" << endl; break; @@ -250,7 +258,9 @@ void PseudoCC::loadData() case 8: loadSingleValue(valueFromUM); break; - + case 16: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; + default: cout << "PC::loadData(): bad column width" << endl; break; diff --git a/utils/common/nullvaluemanip.cpp b/utils/common/nullvaluemanip.cpp index 3b4255dea..63b0c8693 100644 --- a/utils/common/nullvaluemanip.cpp +++ b/utils/common/nullvaluemanip.cpp @@ -125,7 +125,10 @@ uint64_t getNullValue(CalpontSystemCatalog::ColDataType t, uint32_t colWidth) case CalpontSystemCatalog::UBIGINT: return joblist::UBIGINTNULL; - + + case CalpontSystemCatalog::BINARY: + return joblist::BINARYNULL; + case CalpontSystemCatalog::VARBINARY: default: ostringstream os; @@ -239,6 +242,9 @@ int64_t getSignedNullValue(CalpontSystemCatalog::ColDataType t, uint32_t colWidt case CalpontSystemCatalog::LONGDOUBLE: return (int64_t)joblist::LONGDOUBLENULL; + case CalpontSystemCatalog::BINARY: + return (int64_t)joblist::BINARYNULL; + case CalpontSystemCatalog::VARBINARY: default: ostringstream os; diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index d920f5ab3..f89c73d16 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -1531,6 +1531,10 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, value = data; break; + case CalpontSystemCatalog::BINARY: + value = data; + break; + default: throw QueryDataExcept("convertColumnData: unknown column data type.", dataTypeErr); break; @@ -1727,6 +1731,12 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, } break; + case CalpontSystemCatalog::BINARY: + { + value = data; + } + break; + case CalpontSystemCatalog::UTINYINT: { uint8_t utinyintvalue = joblist::UTINYINTNULL; diff --git a/utils/funcexp/func_hex.cpp b/utils/funcexp/func_hex.cpp index 42e6b04d2..97e608981 100644 --- a/utils/funcexp/func_hex.cpp +++ b/utils/funcexp/func_hex.cpp @@ -130,6 +130,15 @@ string Func_hex::getStrVal(rowgroup::Row& row, return string(hexPtr.get(), hexLen); } + case CalpontSystemCatalog::BINARY: + { + const string& arg = parm[0]->data()->getStrVal(row, isNull); + uint64_t hexLen = arg.size() * 2; + scoped_array hexPtr(new char[hexLen + 1]); // "+ 1" for the last \0 + octet2hex(hexPtr.get(), arg.data(), arg.size()); + return string(hexPtr.get(), hexLen); + } + default: { dec = (uint64_t)parm[0]->data()->getIntVal(row, isNull); diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index 9f16123cf..df10ed108 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -628,7 +628,8 @@ string Row::toString() const os << " " << dec; break; } - + case CalpontSystemCatalog::BINARY: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; default: os << getIntField(i) << " "; break; @@ -690,7 +691,8 @@ string Row::toCSV() const os << dec; break; } - + case CalpontSystemCatalog::BINARY: + std::cout << __FILE__<< __LINE__ << ":" << "toCSV"<< std::endl; default: os << getIntField(i); break; @@ -852,7 +854,8 @@ void Row::initToNull() case CalpontSystemCatalog::UBIGINT: *((uint64_t*) &data[offsets[i]]) = joblist::UBIGINTNULL; break; - + case CalpontSystemCatalog::BINARY: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; default: ostringstream os; os << "Row::initToNull(): got bad column type (" << types[i] << @@ -934,7 +937,8 @@ bool Row::isNullValue(uint32_t colIndex) const case 8: return (*((uint64_t*) &data[offsets[colIndex]]) == joblist::CHAR8NULL); - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; default: return (*((uint64_t*) &data[offsets[colIndex]]) == *((uint64_t*) joblist::CPNULLSTRMARK.c_str())); } @@ -1004,6 +1008,16 @@ bool Row::isNullValue(uint32_t colIndex) const return (*((long double*) &data[offsets[colIndex]]) == joblist::LONGDOUBLENULL); break; + case CalpontSystemCatalog::BINARY: + { + // When is null? I dont know. Wait for bitmap null empty implemtenttion ? + // Also still pendig rework discussed use pointers for empty null values + + std::cout << __FILE__<< ":" << __LINE__ << " isNullValue value " << (*((uint64_t*) &data[offsets[colIndex]])) << std::endl; + //return false; + return (*((uint64_t*) &data[offsets[colIndex]]) == joblist::BINARYEMPTYROW); + } + default: { ostringstream os; @@ -1624,7 +1638,8 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& case 8: cr->PutData(row.getUintField<8>(j)); break; - + case 16: + default: { string s = row.getStringField(j); @@ -1645,6 +1660,8 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& cr->PutData(row.getUintField<4>(j)); break; + case CalpontSystemCatalog::BINARY: + std::cout << __FILE__<< __LINE__ << __func__<< std::endl; default: cr->PutData(row.getIntField<8>(j)); } diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index e3b3652e9..c1789dc18 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -28,6 +28,7 @@ #ifndef ROWGROUP_H_ #define ROWGROUP_H_ +#include #include #include #include @@ -421,7 +422,7 @@ public: inline uint32_t getStringLength(uint32_t colIndex) const; void setStringField(const std::string& val, uint32_t colIndex); inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex); - + inline void setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset); // support VARBINARY // Add 2-byte length at the CHARSET_INFO*beginning of the field. NULL and zero length field are // treated the same, could use one of the length bit to distinguish these two cases. @@ -433,6 +434,8 @@ public: inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const; inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex); + inline std::string getBinaryField(uint32_t colIndex) const; + inline boost::shared_ptr getUserData(uint32_t colIndex) const; inline void setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr userData, @@ -664,7 +667,8 @@ inline bool Row::equals(uint64_t val, uint32_t colIndex) const case 8: return *((uint64_t*) &data[offsets[colIndex]]) == val; - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; default: idbassert(0); throw std::logic_error("Row::equals(): bad length."); @@ -692,7 +696,8 @@ inline uint64_t Row::getUintField(uint32_t colIndex) const case 8: return *((uint64_t*) &data[offsets[colIndex]]); - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; default: idbassert(0); throw std::logic_error("Row::getUintField(): bad length."); @@ -711,7 +716,8 @@ inline uint64_t Row::getUintField(uint32_t colIndex) const case 4: return *((uint32_t*) &data[offsets[colIndex]]); - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; case 8: return *((uint64_t*) &data[offsets[colIndex]]); @@ -784,6 +790,12 @@ inline uint32_t Row::getStringLength(uint32_t colIndex) const return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)); } + +inline void Row::setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset) +{ + memcpy(&data[offset], strdata, length); +} + inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex) { uint64_t offset; @@ -817,6 +829,11 @@ inline std::string Row::getStringField(uint32_t colIndex) const strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex))); } +inline std::string Row::getBinaryField(uint32_t colIndex) const +{ + return std::string((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)); +} + inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const { if (inStringTable(colIndex)) @@ -936,7 +953,10 @@ inline void Row::setUintField_offset(uint64_t val, uint32_t offset) case 8: *((uint64_t*) &data[offset]) = val; break; - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; + *((uint64_t*) &data[offset]) = val; + break; default: idbassert(0); throw std::logic_error("Row::setUintField called on a non-uint32_t field"); @@ -974,7 +994,10 @@ inline void Row::setUintField(uint64_t val, uint32_t colIndex) case 8: *((uint64_t*) &data[offsets[colIndex]]) = val; break; - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; + *((uint64_t*) &data[offsets[colIndex]]) = val; + break; default: idbassert(0); throw std::logic_error("Row::setUintField called on a non-uint32_t field"); @@ -1000,7 +1023,9 @@ inline void Row::setUintField(uint64_t val, uint32_t colIndex) case 8: *((uint64_t*) &data[offsets[colIndex]]) = val; break; - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; + *((uint64_t*) &data[offsets[colIndex]]) = val; default: idbassert(0); throw std::logic_error("Row::setUintField: bad length"); @@ -1027,7 +1052,8 @@ inline void Row::setIntField(int64_t val, uint32_t colIndex) case 8: *((int64_t*) &data[offsets[colIndex]]) = val; break; - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; default: idbassert(0); throw std::logic_error("Row::setIntField: bad length"); @@ -1053,7 +1079,8 @@ inline void Row::setIntField(int64_t val, uint32_t colIndex) case 8: *((int64_t*) &data[offsets[colIndex]]) = val; break; - + case 16: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; default: idbassert(0); throw std::logic_error("Row::setIntField: bad length"); diff --git a/utils/thrift/thrift/protocol/TCompactProtocol.tcc b/utils/thrift/thrift/protocol/TCompactProtocol.tcc index 62d6485d7..89c943a8c 100644 --- a/utils/thrift/thrift/protocol/TCompactProtocol.tcc +++ b/utils/thrift/thrift/protocol/TCompactProtocol.tcc @@ -808,6 +808,8 @@ TType TCompactProtocolT::getTType(int8_t type) { case detail::compact::CT_STRUCT: return T_STRUCT; default: + + cout << __FILE__<< __LINE__ << __func__<< endl; throw TException(std::string("don't know what type: ") + (char)type); } return T_STOP; diff --git a/utils/windowfunction/wf_count.cpp b/utils/windowfunction/wf_count.cpp index 1e18c6a74..7af4acb0a 100644 --- a/utils/windowfunction/wf_count.cpp +++ b/utils/windowfunction/wf_count.cpp @@ -19,6 +19,7 @@ //#define NDEBUG +#include #include #include #include @@ -68,7 +69,8 @@ boost::shared_ptr WF_count::makeFunction(int id, const st break; } - default: + case CalpontSystemCatalog::BINARY: + std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl; { func.reset(new WF_count(id, name)); break; diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 2b8d7a4a9..c1dc5a908 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -19,6 +19,7 @@ //#define NDEBUG +#include #include #include #include @@ -490,7 +491,8 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) datum.columnData = valIn; break; } - + case CalpontSystemCatalog::BINARY: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; default: { string errStr = "(" + colType2String[(int)datum.dataType] + ")"; @@ -754,7 +756,8 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, setValue(colDataType, b, e, c, &strOut); } break; - + case CalpontSystemCatalog::BINARY: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; default: { std::ostringstream errmsg; @@ -1101,7 +1104,8 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) datum.columnData = valIn; break; } - + case CalpontSystemCatalog::BINARY: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; default: { string errStr = "(" + colType2String[(int)datum.dataType] + ")"; diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index cea636402..600f5784f 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -19,6 +19,7 @@ */ //#define NDEBUG +#include #include #include #include @@ -741,7 +742,8 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) case CalpontSystemCatalog::LONGDOUBLE: v = &longDoubleNull; break; - + case CalpontSystemCatalog::BINARY: + cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl; case CalpontSystemCatalog::VARBINARY: default: std::ostringstream oss; diff --git a/writeengine/server/we_ddlcommandproc.cpp b/writeengine/server/we_ddlcommandproc.cpp index 0b557a2c4..fd5fe2a5d 100644 --- a/writeengine/server/we_ddlcommandproc.cpp +++ b/writeengine/server/we_ddlcommandproc.cpp @@ -506,6 +506,16 @@ uint8_t WE_DDLCommandProc::writeCreateSyscolumn(ByteStream& bs, std::string& err throw std::runtime_error(os.str()); } + else if (dataType == CalpontSystemCatalog::BINARY + && ! (colDefPtr->fType->fLength == 16 + || colDefPtr->fType->fLength == 32)) + { + ostringstream os; + os << "binary length may not be other than 16 or 32"; + throw std::runtime_error(os.str()); + } + + unsigned int i = 0; column_iterator = columns.begin(); diff --git a/writeengine/server/we_ddlcommon.h b/writeengine/server/we_ddlcommon.h index 9fcc06f7b..d477880f1 100644 --- a/writeengine/server/we_ddlcommon.h +++ b/writeengine/server/we_ddlcommon.h @@ -495,6 +495,10 @@ inline int convertDataType(int dataType) case ddlpackage::DDL_UNSIGNED_DOUBLE: calpontDataType = execplan::CalpontSystemCatalog::UDOUBLE; break; + + case ddlpackage::DDL_BINARY: + calpontDataType = execplan::CalpontSystemCatalog::BINARY; + break; default: throw runtime_error("Unsupported datatype!"); diff --git a/writeengine/shared/shared_components_tests.cpp b/writeengine/shared/shared_components_tests.cpp index 682bf1251..db2f9d48c 100644 --- a/writeengine/shared/shared_components_tests.cpp +++ b/writeengine/shared/shared_components_tests.cpp @@ -91,7 +91,8 @@ CPPUNIT_TEST(setUp); // Extent & dict related testing CPPUNIT_TEST( testExtensionWOPrealloc ); CPPUNIT_TEST( testDictExtensionWOPrealloc ); -// Semaphore related testing + CPPUNIT_TEST( testExtentCrWOPreallocBin ); + // Semaphore related testing // CPPUNIT_TEST( testSem ); // Log related testing @@ -1542,7 +1543,180 @@ public: } */ + template struct binary; + typedef binary<16> binary16; + typedef binary<32> binary32; + template + struct binary { + unsigned char data[W]; // May be ok for empty value ? + void operator=(uint64_t v) {*((uint64_t *) data) = v; memset(data + 8, 0, W - 8);} + inline uint8_t& operator[](const int index) {return *((uint8_t*) (data + index));} + inline uint64_t& uint64(const int index) {return *((uint64_t*) (data + (index << 3)));} + }; + + void testExtentCrWOPreallocBin() { + IDBDataFile* pFile = NULL; + ColumnOpCompress1 fileOp; + BlockOp blockOp; + char fileName[20]; + int rc; + char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; + int dbRoot = 1; + idbdatafile::IDBPolicy::init(true, false, "", 0); + // Set to versionbuffer to satisfy IDBPolicy::getType + strcpy(fileName, "versionbuffer"); + fileOp.compressionType(1); + + fileOp.deleteFile(fileName); + CPPUNIT_ASSERT(fileOp.exists(fileName) == false); + + //binary16 emptyVal = blockOp.getEmptyBinRowValue( execplan::CalpontSystemCatalog::BINARY, 16 ); + uint64_t emptyVal = blockOp.getEmptyRowValue(execplan::CalpontSystemCatalog::BIGINT, 8); + int width = blockOp.getCorrectRowWidth(execplan::CalpontSystemCatalog::BINARY, sizeof (binary16)); + int nBlocks = INITIAL_EXTENT_ROWS_TO_DISK / BYTE_PER_BLOCK * width; + + // createFile runs IDBDataFile::open + initAbrevCompColumnExtent + // under the hood + // bigint column file + rc = fileOp.createFile(fileName, + nBlocks, // number of blocks + emptyVal, // NULL value + width, // width + dbRoot); // dbroot + CPPUNIT_ASSERT(rc == NO_ERROR); + + fileOp.closeFile(pFile); + + // open created compressed file and check its header + pFile = IDBDataFile::open(IDBPolicy::getType(fileName, + IDBPolicy::WRITEENG), fileName, "rb", dbRoot); + + rc = pFile->seek(0, 0); + CPPUNIT_ASSERT(rc == NO_ERROR); + rc = fileOp.readHeaders(pFile, hdrs); + CPPUNIT_ASSERT(rc == NO_ERROR); + // Couldn't use IDBDataFile->close() here w/o excplicit cast + fileOp.closeFile(pFile); + + // Extend the extent up to 64MB + pFile = IDBDataFile::open(IDBPolicy::getType(fileName, + IDBPolicy::WRITEENG), fileName, "rb", dbRoot); + + // disable disk space preallocation + idbdatafile::IDBPolicy::setPreallocSpace(dbRoot); + rc = fileOp.initColumnExtent(pFile, + dbRoot, + BYTE_PER_BLOCK - nBlocks, // number of blocks + emptyVal, + width, + false, // use existing file + false, // don't expand; new extent + false, // add full (not abbreviated) extent + true); // optimize extention + + CPPUNIT_ASSERT(rc == NO_ERROR); + fileOp.closeFile(pFile); + // file has been extended + cout << endl << "file has been extended"; + + // write up to INITIAL_EXTENT_ROWS_TO_DISK + 1 rows into the file + + Column curCol; + binary16 valArray[INITIAL_EXTENT_ROWS_TO_DISK + 1]; + RID rowIdArray[INITIAL_EXTENT_ROWS_TO_DISK + 1]; + // This is the magic for the stub in FileOp::oid2FileName + int fid = 42; + + for (uint64_t it = 0; it <= INITIAL_EXTENT_ROWS_TO_DISK; it++) { + rowIdArray[it] = it; + valArray[it].uint64(0) = it + 3; + valArray[it].uint64(1) = it + 5; + } + + fileOp.initColumn(curCol); + fileOp.setColParam(curCol, + 1, // column number + width, + execplan::CalpontSystemCatalog::BINARY, + WriteEngine::WR_BINARY, + fid, + 1); //compression type + + string segFile; + // openColumnFile uses DBRM's oid server but we + // have to get the chunks' pointers from the header. + curCol.dataFile.pFile = fileOp.openFile( + curCol, + dbRoot, + 0, + 0, + segFile, + false, + "r+b", + BYTE_PER_BLOCK * BYTE_PER_BLOCK); // buffer size is 64MB + + CPPUNIT_ASSERT(rc == NO_ERROR); + + rc = fileOp.writeRow(curCol, INITIAL_EXTENT_ROWS_TO_DISK + 1, + (RID*) rowIdArray, valArray); + CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc); // I prefer this way as it prints values + + // flush and close the file used for reading + fileOp.clearColumn(curCol); + + std::map oids; + oids[fid] = fid; + + // flush changed chunks from the Manager + int rtn1 = fileOp.chunkManager()->flushChunks(rc, oids); + + // read back the file + cout << endl << "Read file "; + DataBlock block; + binary16* bin16 = (binary16*) block.data; + + fileOp.initColumn(curCol); + fileOp.setColParam(curCol, + 1, // column number + width, + execplan::CalpontSystemCatalog::BINARY, + WriteEngine::WR_BINARY, + fid, + 1); //compression type + + curCol.dataFile.pFile = fileOp.openFile( + curCol, + dbRoot, + 0, + 0, + segFile, + false, + "r+b", + BYTE_PER_BLOCK * BYTE_PER_BLOCK); // buffer size is 64MB + + CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc); + + int blocks = fileOp.blocksInFile(curCol.dataFile.pFile); + + for (int b = 0; b < blocks; b++) { + rc = fileOp.chunkManager()->readBlock(curCol.dataFile.pFile, block.data, b); // ColumnOpCompress1.readBlock() is protected so ... + CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc); + //cout << endl << bin16[0].uint64(0); + CPPUNIT_ASSERT_EQUAL(b * 512UL + 3, bin16[0].uint64(0)); // Checking just first value of each block as it was written before + CPPUNIT_ASSERT_EQUAL(b * 512UL + 5, bin16[0].uint64(1)); + } + + fileOp.clearColumn(curCol); + fileOp.closeFile(curCol.dataFile.pFile); // Seems done by clearColumn, but anyways... + + cout << endl << "Delete file "; + + fileOp.deleteFile(fileName); + CPPUNIT_ASSERT(fileOp.exists(fileName) == false); + cout << endl << "End of test"; + } + void testCleanup() { // shutdown diff --git a/writeengine/shared/we_blockop.cpp b/writeengine/shared/we_blockop.cpp index fb8dfe89b..da104ad9b 100644 --- a/writeengine/shared/we_blockop.cpp +++ b/writeengine/shared/we_blockop.cpp @@ -159,6 +159,10 @@ uint64_t BlockOp::getEmptyRowValue( case CalpontSystemCatalog::UBIGINT : emptyVal = joblist::UBIGINTEMPTYROW; break; + + case CalpontSystemCatalog::BINARY : + emptyVal = joblist::BINARYEMPTYROW; + break; case CalpontSystemCatalog::CHAR : case CalpontSystemCatalog::VARCHAR : @@ -267,9 +271,11 @@ void BlockOp::setEmptyBuf( // Optimize buffer initialization by constructing and copying in an array // instead of individual values. This reduces the number of calls to // memcpy(). - for (int j = 0; j < ARRAY_COUNT; j++) + + int w = width > 8 ? 8: width; + for(uint8_t* pos = emptyValArray, * end = pos + NBYTES_IN_ARRAY; pos < end; pos += w) //FIXME for no loop { - memcpy(emptyValArray + (j * width), &emptyVal, width); + memcpy(pos, &emptyVal, w); } int countFull128 = (bufSize / width) / ARRAY_COUNT; diff --git a/writeengine/shared/we_convertor.cpp b/writeengine/shared/we_convertor.cpp index c550c1050..2af0df827 100644 --- a/writeengine/shared/we_convertor.cpp +++ b/writeengine/shared/we_convertor.cpp @@ -434,6 +434,11 @@ void Convertor::convertColType(CalpontSystemCatalog::ColDataType dataType, case CalpontSystemCatalog::UBIGINT: internalType = WriteEngine::WR_ULONGLONG; break; + + // Map BINARY to WR_BINARY + case CalpontSystemCatalog::BINARY: + internalType = WriteEngine::WR_BINARY; + break; default: internalType = WriteEngine::WR_CHAR; @@ -682,6 +687,11 @@ void Convertor::convertColType(ColStruct* curStruct) case CalpontSystemCatalog::UBIGINT: *internalType = WriteEngine::WR_ULONGLONG; break; + + // Map BINARY to WR_BINARY + case CalpontSystemCatalog::BINARY: + *internalType = WriteEngine::WR_BINARY; + break; default: *internalType = WriteEngine::WR_CHAR; @@ -772,7 +782,11 @@ int Convertor::getCorrectRowWidth(CalpontSystemCatalog::ColDataType dataType, in case CalpontSystemCatalog::TIMESTAMP: newWidth = 8; break; - + + case CalpontSystemCatalog::BINARY: + newWidth = width; + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::VARBINARY: // treat same as varchar for now diff --git a/writeengine/shared/we_type.h b/writeengine/shared/we_type.h index 71581b44b..6732b48b1 100644 --- a/writeengine/shared/we_type.h +++ b/writeengine/shared/we_type.h @@ -108,9 +108,10 @@ enum ColType /** @brief Column type enumeration*/ WR_USHORT = 14, /** @brief Unsigned Short */ WR_UINT = 15, /** @brief Unsigned Int */ WR_ULONGLONG = 16, /** @brief Unsigned Long long*/ - WR_TEXT = 17, /** @brief TEXT */ + WR_TEXT = 17, /** @brief TEXT */ WR_MEDINT = 18, /** @brief Medium Int */ - WR_UMEDINT = 19 /** @brief Unsigned Medium Int */ + WR_UMEDINT = 19, /** @brief Unsigned Medium Int */ + WR_BINARY = 20 /** @brief BINARY */ }; // Describes relation of field to column for a bulk load diff --git a/writeengine/wrapper/we_colop.cpp b/writeengine/wrapper/we_colop.cpp index 69e2d6f58..6e4e8c91f 100644 --- a/writeengine/wrapper/we_colop.cpp +++ b/writeengine/wrapper/we_colop.cpp @@ -123,7 +123,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, newFile = false; Column newCol; unsigned char buf[BYTE_PER_BLOCK]; - + unsigned char* curVal; + int64_t emptyVal = getEmptyRowValue(column.colDataType, column.colWidth); // Seems is ok have it here and just once + if (useStartingExtent) { // ZZ. For insert select, skip the hwm block and start inserting from the next block @@ -137,10 +139,10 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, if ( rc != NO_ERROR) return rc; - - for (j = 0; j < totalRowPerBlock; j++) + + for (j = 0, curVal = buf; j < totalRowPerBlock; j++, curVal += column.colWidth) { - if (isEmptyRow(buf, j, column)) + if (isEmptyRow((uint64_t*)curVal, emptyVal, column.colWidth)) { rowIdArray[counter] = getRowId(hwm, column.colWidth, j); rowsallocated++; @@ -192,9 +194,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, } } - for (j = 0; j < totalRowPerBlock; j++) + for (j = 0, curVal = buf; j < totalRowPerBlock; j++, curVal += column.colWidth) { - if (isEmptyRow(buf, j, column)) + if (isEmptyRow((uint64_t*)curVal, emptyVal, column.colWidth)) { rowIdArray[counter] = getRowId(hwm, column.colWidth, j); rowsallocated++; @@ -492,9 +494,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, } } - for (j = 0; j < totalRowPerBlock; j++) + for (j = 0, curVal = buf; j < totalRowPerBlock; j++, curVal += column.colWidth) { - if (isEmptyRow(buf, j, column)) + if (isEmptyRow((uint64_t*)curVal, emptyVal, column.colWidth)) // Why to check it if beacause line 483 is always true ? { rowIdArray[counter] = getRowId(newHwm, column.colWidth, j); rowsallocated++; @@ -533,9 +535,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, } } - for (j = 0; j < totalRowPerBlock; j++) + for (j = 0, curVal = buf; j < totalRowPerBlock; j++, curVal += column.colWidth) { - if (isEmptyRow(buf, j, column)) + if (isEmptyRow((uint64_t*)curVal, emptyVal, column.colWidth)) { rowIdArray[counter] = getRowId(newHwm, newCol.colWidth, j); rowsallocated++; @@ -1064,7 +1066,7 @@ int ColumnOp::fillColumn(const TxnID& txnid, Column& column, Column& refCol, voi startColFbo++; colBufOffset = 0; } - + while (((refBufOffset + refCol.colWidth) <= BYTE_PER_BLOCK) && ((colBufOffset + column.colWidth) <= BYTE_PER_BLOCK)) { @@ -1080,7 +1082,8 @@ int ColumnOp::fillColumn(const TxnID& txnid, Column& column, Column& refCol, voi } else if (column.compressionType != 0) //@Bug 3866, fill the empty row value for compressed chunk { - memcpy(colBuf + colBufOffset, &emptyVal, column.colWidth); + for(int b = 0, w = column.colWidth; b < column.colWidth; b += 8, w = 8) //FIXME for no loop! + memcpy(colBuf + colBufOffset + b, &emptyVal, w); dirty = true; } @@ -1405,18 +1408,39 @@ void ColumnOp::initColumn(Column& column) const * RETURN: * true if success, false otherwise ***********************************************************/ -bool ColumnOp::isEmptyRow(unsigned char* buf, int offset, const Column& column) + +// It is called at just 4 places on allocRowId() but all the time inside extend scanning loops +inline bool ColumnOp::isEmptyRow(uint64_t* curVal, uint64_t emptyVal, const int colWidth) { - bool emptyFlag = true; - uint64_t curVal, emptyVal; + //Calling it here makes calling it "i" times from the calling loop at allocRowId() + //uint64_t emptyVal = getEmptyRowValue(column.colDataType, column.colWidth); + + // No need for it if change param type.. just been lazy to add extra castings + //uint64_t &emptyVal = column.emptyVal; + + //no need to multiply over and over if just increment the pointer on the caller + //uint64_t *curVal = (uint64_t*)(buf + offset * column.colWidth); - memcpy(&curVal, buf + offset * column.colWidth, column.colWidth); - emptyVal = getEmptyRowValue(column.colDataType, column.colWidth); + switch(colWidth){ + case 1: + return *(uint8_t*)curVal == emptyVal; - if (/*curVal != emptyVal*/memcmp(&curVal, &emptyVal, column.colWidth)) - emptyFlag = false; - - return emptyFlag; + case 2: + return *(uint16_t*)curVal == emptyVal; + + case 4: + return *(uint32_t*)curVal == emptyVal; + + case 8: + return *curVal == emptyVal; + + case 16: + return ((curVal[0] == emptyVal) && (curVal[1] == emptyVal)); + + case 32: + return ((curVal[0] == emptyVal) && (curVal[1] == emptyVal) + && (curVal[2] == emptyVal) && (curVal[3] == emptyVal)); + } } /*********************************************************** @@ -1534,7 +1558,7 @@ void ColumnOp::setColParam(Column& column, column.colWidth = colWidth; column.colType = colType; column.colDataType = colDataType; - + column.dataFile.fid = dataFid; column.dataFile.fDbRoot = dbRoot; column.dataFile.fPartition = partition; @@ -1662,6 +1686,12 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray, if (!bDelete) pVal = &((uint64_t*) valArray)[i]; break; + case WriteEngine::WR_BINARY: + if (!bDelete) pVal = (uint8_t*) valArray + i * curCol.colWidth; + + //pOldVal = (uint8_t*) oldValArray + i * curCol.colWidth; + break; + default : if (!bDelete) pVal = &((int*) valArray)[i]; break; @@ -1669,7 +1699,7 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray, if (bDelete) { - emptyVal = getEmptyRowValue(curCol.colDataType, curCol.colWidth); + emptyVal = getEmptyRowValue(curCol.colDataType, curCol.colWidth); pVal = &emptyVal; } @@ -1852,7 +1882,8 @@ int ColumnOp::writeRows(Column& curCol, uint64_t totalRow, const RIDList& ridLis } // This is the write stuff - writeBufValue(dataBuf + dataBio, pVal, curCol.colWidth); + for(int b = 0, w = curCol.colWidth > 8 ? 8 : curCol.colWidth; b < curCol.colWidth; b += 8) //FIXME for no loop + writeBufValue(dataBuf + dataBio + b, pVal, w); i++; diff --git a/writeengine/wrapper/we_colop.h b/writeengine/wrapper/we_colop.h index d1947d752..ad485a8e7 100644 --- a/writeengine/wrapper/we_colop.h +++ b/writeengine/wrapper/we_colop.h @@ -220,7 +220,7 @@ public: /** * @brief Check whether it is an empty row */ - EXPORT virtual bool isEmptyRow(unsigned char* buf, int offset, const Column& column); + EXPORT virtual bool isEmptyRow(uint64_t* curVal, uint64_t emptyVal, const int colWidth); /** * @brief Check whether it is a valid column diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 62842da0d..93a5c1200 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -388,6 +388,15 @@ void WriteEngineWrapper::convertValue(const ColType colType, void* value, boost: memcpy(value, &val, size); } break; + + case WriteEngine::WR_BINARY: + { + char val = boost::any_cast(data); + //TODO:FIXME how to determine size ? 16, 32,48 ? + size = 16; + memcpy(value, &val, size); + } + break; } // end of switch (colType) } /*@convertValue - The base for converting values */ @@ -492,6 +501,12 @@ void WriteEngineWrapper::convertValue(const ColType colType, void* valArray, con case WriteEngine::WR_TOKEN: ((Token*)valArray)[pos] = boost::any_cast(data); break; + + case WriteEngine::WR_BINARY: + curStr = boost::any_cast(data); + memcpy((char*)valArray + pos * curStr.length(), curStr.c_str(), curStr.length()); + break; + } // end of switch (colType) } else @@ -557,6 +572,16 @@ void WriteEngineWrapper::convertValue(const ColType colType, void* valArray, con case WriteEngine::WR_TOKEN: data = ((Token*)valArray)[pos]; break; + + case WriteEngine::WR_BINARY : + { + char tmp[16]; + //TODO:FIXME how to determine size ? 16, 32,48 ? + memcpy(tmp, (char*)valArray + pos * 16, 16); + curStr = tmp; + data = curStr; + } + break; } // end of switch (colType) } // end of if } @@ -3234,7 +3259,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, } bool newFile; - + cout << "Datafile " << curCol.dataFile.fSegFileName << endl; #ifdef PROFILE timer.start("allocRowId"); #endif @@ -5130,6 +5155,10 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid, case WriteEngine::WR_TOKEN: valArray = (Token*) calloc(sizeof(Token), totalRow1); break; + + case WriteEngine::WR_BINARY: + valArray = calloc(colStructList[i].colWidth, totalRow1); + break; } // convert values to valArray @@ -5349,6 +5378,11 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, tmp16 = curValue; ((uint16_t*)valArray)[j] = tmp16; break; + + case WriteEngine::WR_BINARY: + ((uint64_t*)valArray)[j] = curValue; //FIXME maybe + break; + } } @@ -5492,6 +5526,10 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, tmp16 = curValue; ((uint16_t*)valArray)[j] = tmp16; break; + + case WriteEngine::WR_BINARY: + ((uint64_t*)valArray)[j] = curValue; // FIXME maybe + break; } } @@ -5775,6 +5813,9 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid, case WriteEngine::WR_TOKEN: valArray = (Token*) calloc(sizeof(Token), 1); break; + case WriteEngine::WR_BINARY: + valArray = (char*) calloc(sizeof(char), curColStruct.colWidth); //FIXME maybe + break; } // convert values to valArray