1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-641 Work of Ivan Zuniga on basic read and write support for Binary16

This commit is contained in:
Gagan Goel
2019-10-24 14:01:47 -04:00
committed by Roman Nozdrin
parent d943beb445
commit 32f6167067
48 changed files with 1114 additions and 75 deletions

View File

@ -188,6 +188,7 @@ LONGTEXT {return LONGTEXT;}
BOOL {return BOOL;}
BOOLEAN {return BOOLEAN;}
MEDIUMINT {return MEDIUMINT;}
BINARY {return BINARY;}
\n { lineno++;}

View File

@ -112,7 +112,7 @@ MIN_ROWS MODIFY NO NOT NULL_TOK NUMBER NUMERIC ON PARTIAL PRECISION PRIMARY
REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TINYBLOB TINYTEXT
TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY
VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET COLLATE IDB_IF EXISTS CHANGE TRUNCATE
BOOL BOOLEAN MEDIUMINT TIMESTAMP
BOOL BOOLEAN MEDIUMINT TIMESTAMP BINARY
%token <str> DQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE TIME
@ -131,6 +131,7 @@ BOOL BOOLEAN MEDIUMINT TIMESTAMP
%type <ata> ata_rename_table
%type <columnType> character_string_type
%type <columnType> binary_string_type
%type <columnType> fixed_binary_string_type
%type <columnType> blob_type
%type <columnType> text_type
%type <str> check_constraint_def
@ -747,6 +748,7 @@ opt_column_collate:
data_type:
character_string_type opt_column_charset opt_column_collate
| binary_string_type
| fixed_binary_string_type
| numeric_type
| datetime_type
| blob_type
@ -918,6 +920,14 @@ binary_string_type:
}
;
fixed_binary_string_type:
BINARY '(' ICONST ')'
{
$$ = new ColumnType(DDL_BINARY);
$$->fLength = atoi($3);
}
;
blob_type:
BLOB '(' ICONST ')'
{

View File

@ -238,6 +238,7 @@ enum DDL_DATATYPES
DDL_TEXT,
DDL_TIME,
DDL_TIMESTAMP,
DDL_BINARY,
DDL_INVALID_DATATYPE
};
@ -276,7 +277,8 @@ const std::string DDLDatatypeString[] =
"unsigned-numeric",
"text",
"time",
"timestamp"
"timestamp",
"binary",
""
};

View File

@ -247,6 +247,11 @@ bool typesAreSame(const CalpontSystemCatalog::ColType& colType, const ColumnType
break;
case (CalpontSystemCatalog::BINARY):
if (newType.fType == DDL_BINARY && colType.colWidth == newType.fLength) return true;
break;
default:
break;
}

View File

@ -252,6 +252,10 @@ execplan::CalpontSystemCatalog::ColDataType DDLPackageProcessor::convertDataType
colDataType = CalpontSystemCatalog::TEXT;
break;
case ddlpackage::DDL_BINARY:
colDataType = CalpontSystemCatalog::BINARY;
break;
default:
throw runtime_error("Unsupported datatype!");

View File

@ -226,6 +226,10 @@ const string colDataTypeToString(CalpontSystemCatalog::ColDataType cdt)
return "udouble";
break;
case CalpontSystemCatalog::BINARY:
return "binary";
break;
default:
break;
}

View File

@ -170,7 +170,8 @@ public:
NUM_OF_COL_DATA_TYPE, /* NEW TYPES ABOVE HERE */
LONGDOUBLE, /* @bug3241, dev and variance calculation only */
STRINT, /* @bug3532, string as int for fast comparison */
UNDEFINED /*!< Undefined - used in UDAF API */
UNDEFINED, /*!< Undefined - used in UDAF API */
BINARY, /*!< BINARY type */
};
/** the set of column constraint types
@ -1212,6 +1213,13 @@ inline bool isNull(int64_t val, const execplan::CalpontSystemCatalog::ColType& c
break;
}
case execplan::CalpontSystemCatalog::BINARY:
{
ret = false;
break;
}
default:
break;
}

View File

@ -126,6 +126,7 @@ bool PredicateOperator::operator!=(const TreeNode* t) const
}
//FIXME: VARBINARY???
//FIXME: BINARY???
void PredicateOperator::setOpType(Type& l, Type& r)
{
fOperationType = l; // Default to left side. Modify as needed.

View File

@ -690,6 +690,13 @@ void SimpleColumn::evaluate(Row& row, bool& isNull)
break;
}
case CalpontSystemCatalog::BINARY:
{
fResult.strVal = row.getBinaryField(fInputIndex);
break;
}
default: // treat as int64
{
fResult.intVal = row.getUintField<8>(fInputIndex);

View File

@ -25,6 +25,7 @@
#ifndef SIMPLECOLUMNDECIMAL_H
#define SIMPLECOLUMNDECIMAL_H
#include <iostream>
#include <string>
#include <cmath>
@ -141,7 +142,8 @@ void SimpleColumn_Decimal<len>::setNullVal()
case 1:
fNullVal = joblist::TINYINTNULL;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
default:
fNullVal = joblist::BIGINTNULL;
}
@ -223,6 +225,8 @@ void SimpleColumn_Decimal<len>::serialize(messageqcpp::ByteStream& b) const
case 8:
b << (ObjectReader::id_t) ObjectReader::SIMPLECOLUMN_DECIMAL8;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
}
SimpleColumn::serialize(b);
@ -248,6 +252,8 @@ void SimpleColumn_Decimal<len>::unserialize(messageqcpp::ByteStream& b)
case 8:
ObjectReader::checkType(b, ObjectReader::SIMPLECOLUMN_DECIMAL8);
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << std::endl;
}
SimpleColumn::unserialize(b);

View File

@ -25,6 +25,7 @@
#ifndef SIMPLECOLUMNINT_H
#define SIMPLECOLUMNINT_H
#include <iostream>
#include <string>
#include "simplecolumn.h"
@ -241,6 +242,8 @@ void SimpleColumn_INT<len>::serialize(messageqcpp::ByteStream& b) const
case 8:
b << (ObjectReader::id_t) ObjectReader::SIMPLECOLUMN_INT8;
break;
case 16:
std::cout << __FILE__<< ":" << __LINE__ << " Fix for 16 Bytes ?" << std::endl;
}
SimpleColumn::serialize(b);
@ -266,6 +269,8 @@ void SimpleColumn_INT<len>::unserialize(messageqcpp::ByteStream& b)
case 8:
ObjectReader::checkType(b, ObjectReader::SIMPLECOLUMN_INT8);
break;
case 16:
std::cout << __FILE__<< ":" << __LINE__ << " Fix for 16 Bytes ?" << std::endl;
}
SimpleColumn::unserialize(b);

View File

@ -25,6 +25,7 @@
#ifndef SIMPLECOLUMNUINT_H
#define SIMPLECOLUMNUINT_H
#include <iostream>
#include <string>
#include "simplecolumn.h"
@ -140,7 +141,8 @@ void SimpleColumn_UINT<len>::setNullVal()
case 1:
fNullVal = joblist::UTINYINTNULL;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << std::endl;
default:
fNullVal = joblist::UBIGINTNULL;
}
@ -241,6 +243,8 @@ void SimpleColumn_UINT<len>::serialize(messageqcpp::ByteStream& b) const
case 8:
b << (ObjectReader::id_t) ObjectReader::SIMPLECOLUMN_UINT8;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << std::endl;
}
SimpleColumn::serialize(b);
@ -266,6 +270,8 @@ void SimpleColumn_UINT<len>::unserialize(messageqcpp::ByteStream& b)
case 8:
ObjectReader::checkType(b, ObjectReader::SIMPLECOLUMN_UINT8);
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << std::endl;
}
SimpleColumn::unserialize(b);

View File

@ -219,6 +219,7 @@ const string SimpleFilter::data() const
fRhs->resultType().colDataType == CalpontSystemCatalog::DATE ||
fRhs->resultType().colDataType == CalpontSystemCatalog::DATETIME ||
fRhs->resultType().colDataType == CalpontSystemCatalog::TIMESTAMP ||
fRhs->resultType().colDataType == CalpontSystemCatalog::BINARY ||
fRhs->resultType().colDataType == CalpontSystemCatalog::TIME))
rhs = "'" + SimpleFilter::escapeString(fRhs->data()) + "'";
else
@ -233,6 +234,7 @@ const string SimpleFilter::data() const
fLhs->resultType().colDataType == CalpontSystemCatalog::DATE ||
fLhs->resultType().colDataType == CalpontSystemCatalog::TIME ||
fLhs->resultType().colDataType == CalpontSystemCatalog::TIMESTAMP ||
fLhs->resultType().colDataType == CalpontSystemCatalog::BINARY ||
fLhs->resultType().colDataType == CalpontSystemCatalog::DATETIME))
lhs = "'" + SimpleFilter::escapeString(fLhs->data()) + "'";
else

View File

@ -741,6 +741,10 @@ inline const std::string& TreeNode::getStrVal(const std::string& timeZone)
break;
}
case CalpontSystemCatalog::BINARY:
{
break;
}
default:
throw logging::InvalidConversionExcept("TreeNode::getStrVal: Invalid conversion.");
}
@ -1078,6 +1082,7 @@ inline IDB_Decimal TreeNode::getDecimalVal()
case CalpontSystemCatalog::VARBINARY:
case CalpontSystemCatalog::BLOB:
case CalpontSystemCatalog::BINARY:
throw logging::InvalidConversionExcept("TreeNode::getDecimalVal: non-support conversion from binary string");
case CalpontSystemCatalog::BIGINT:

View File

@ -478,7 +478,8 @@ void WindowFunctionColumn::evaluate(Row& row, bool& isNull)
fResult.origIntVal = row.getUintField<8>(fInputIndex);
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << endl;
default:
if (row.equals(CPNULLSTRMARK, fInputIndex))
isNull = true;

View File

@ -667,7 +667,8 @@ void BatchPrimitiveProcessorJL::getTuples(messageqcpp::ByteStream& in,
columnData[j]++;
pos++;
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << endl;
default:
cout << "BPP::getTuples(): bad column width of " << colWidths[j]
<< endl;

View File

@ -269,7 +269,8 @@ uint8_t ColumnCommandJL::getTableColumnType()
case 1:
return TableColumn::UINT8;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix 16 Bytes ?" << endl;
default:
throw logic_error("ColumnCommandJL: bad column width");
}

View File

@ -332,6 +332,8 @@ string extractTableAlias(const SSC& sc)
//------------------------------------------------------------------------------
CalpontSystemCatalog::OID isDictCol(const CalpontSystemCatalog::ColType& colType)
{
if (colType.colDataType == CalpontSystemCatalog::BINARY) return 0;
if (colType.colWidth > 8) return colType.ddn.dictOID;
if (colType.colDataType == CalpontSystemCatalog::VARCHAR &&

View File

@ -83,6 +83,9 @@ const uint16_t NULL_UINT16 = USMALLINTNULL;
const uint32_t NULL_UINT32 = UINTNULL;
const uint64_t NULL_UINT64 = UBIGINTNULL;
const uint64_t BINARYEMPTYROW = 0;
const uint64_t BINARYNULL = 0;
const std::string CPNULLSTRMARK("_CpNuLl_");
const std::string CPSTRNOTFOUND("_CpNoTf_");

View File

@ -699,6 +699,8 @@ bool LBIDList::CasualPartitionPredicate(const int64_t Min,
uint64_t val = *(int64_t*)MsgDataPtr;
value = static_cast<int64_t>(val);
}
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
}
}
else
@ -731,6 +733,8 @@ bool LBIDList::CasualPartitionPredicate(const int64_t Min,
int64_t val = *(int64_t*)MsgDataPtr;
value = val;
}
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
}
}

View File

@ -74,6 +74,11 @@ PassThruCommandJL::PassThruCommandJL(const PassThruStep& p)
tableColumnType = TableColumn::UINT64;
break;
case 16:
case 32:
tableColumnType = TableColumn::STRING;
break;
default:
throw logic_error("PassThruCommandJL(): bad column width?");
}

View File

@ -178,7 +178,7 @@ pColScanStep::pColScanStep(
fColType.colWidth = 8;
fIsDict = true;
}
else if (fColType.colWidth > 8 )
else if (fColType.colWidth > 8 && fColType.colDataType != CalpontSystemCatalog::BINARY)
{
fColType.colWidth = 8;
fIsDict = true;

View File

@ -177,7 +177,7 @@ pColStep::pColStep(
fColType.colWidth = 8;
fIsDict = true;
}
else if (fColType.colWidth > 8 )
else if (fColType.colWidth > 8 && fColType.colDataType != CalpontSystemCatalog::BINARY )
{
fColType.colWidth = 8;
fIsDict = true;

View File

@ -252,6 +252,10 @@ uint32_t convertDataType(int dataType)
calpontDataType = CalpontSystemCatalog::UDOUBLE;
break;
case ddlpackage::DDL_BINARY:
calpontDataType = CalpontSystemCatalog::BINARY;
break;
default:
throw runtime_error("Unsupported datatype!");

View File

@ -817,7 +817,16 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h
break;
}
case CalpontSystemCatalog::BINARY:
{
Field_varstring* f2 = (Field_varstring*)*f;
f2->store(row.getBinaryField(s).c_str(), 16, f2->charset());
if ((*f)->null_ptr)
*(*f)->null_ptr &= ~(*f)->null_bit;
break;
}
default: // treat as int64
{
intColVal = row.getUintField<8>(s);

View File

@ -273,6 +273,21 @@ inline bool colStrCompare_(uint64_t val1, uint64_t val2, uint8_t COP, const idb_
template<int>
inline bool isEmptyVal(uint8_t type, const uint8_t* val8);
template<>
inline bool isEmptyVal<32>(uint8_t type, const uint8_t* ival) // For BINARY
{
const uint64_t* val = reinterpret_cast<const uint64_t*>(ival);
return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYEMPTYROW)
&& (val[2] == joblist::BINARYEMPTYROW) && (val[3] == joblist::BINARYEMPTYROW));
}
template<>
inline bool isEmptyVal<16>(uint8_t type, const uint8_t* ival) // For BINARY
{
const uint64_t* val = reinterpret_cast<const uint64_t*>(ival);
return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYEMPTYROW));
}
template<>
inline bool isEmptyVal<8>(uint8_t type, const uint8_t* ival)
{
@ -394,6 +409,21 @@ inline bool isEmptyVal<1>(uint8_t type, const uint8_t* ival)
template<int>
inline bool isNullVal(uint8_t type, const uint8_t* val8);
template<>
inline bool isNullVal<16>(uint8_t type, const uint8_t* ival) // For BINARY
{
const uint64_t* val = reinterpret_cast<const uint64_t*>(ival);
return ((val[0] == joblist::BINARYNULL) && (val[1] == joblist::BINARYNULL));
}
template<>
inline bool isNullVal<32>(uint8_t type, const uint8_t* ival) // For BINARY
{
const uint64_t* val = reinterpret_cast<const uint64_t*>(ival);
return ((val[0] == joblist::BINARYNULL) && (val[1] == joblist::BINARYNULL)
&& (val[2] == joblist::BINARYNULL) && (val[3] == joblist::BINARYNULL));
}
template<>
inline bool isNullVal<8>(uint8_t type, const uint8_t* ival)
{
@ -521,6 +551,12 @@ inline bool isNullVal(uint32_t length, uint8_t type, const uint8_t* val8)
{
switch (length)
{
case 32:
return isNullVal<32>(type, val8);
case 16:
return isNullVal<16>(type, val8);
case 8:
return isNullVal<8>(type, val8);
@ -703,6 +739,16 @@ inline void store(const NewColRequestHeader* in,
switch (in->DataSize)
{
case 32:
ptr2 += (rid << 5);
memcpy(ptr1, ptr2, 32);
break;
case 16:
ptr2 += (rid << 4);
memcpy(ptr1, ptr2, 16);
break;
default:
case 8:
ptr2 += (rid << 3);
@ -724,7 +770,6 @@ inline void store(const NewColRequestHeader* in,
memcpy(ptr1, ptr2, 1);
break;
}
*written += in->DataSize;
}
@ -811,6 +856,66 @@ inline uint64_t nextUnsignedColValue(int type,
return -1;
}
}
template<int W>
inline uint8_t* nextBinColValue(int type,
const uint16_t* ridArray,
int NVALS,
int* index,
bool* done,
bool* isNull,
bool* isEmpty,
uint16_t* rid,
uint8_t OutputType, uint8_t* val8, unsigned itemsPerBlk)
{
if (ridArray == NULL)
{
while (static_cast<unsigned>(*index) < itemsPerBlk &&
isEmptyVal<W>(type, &val8[*index * W]) &&
(OutputType & OT_RID))
{
(*index)++;
}
if (static_cast<unsigned>(*index) >= itemsPerBlk)
{
*done = true;
return NULL;
}
*rid = (*index)++;
}
else
{
//FIXME: not complete nor tested . How make execution flow pass here
// whe is ridArray not NULL ? fidn by id? how?
while (*index < NVALS &&
isEmptyVal<W>(type, &val8[ridArray[*index] * W]))
{
(*index)++;
}
if (*index >= NVALS)
{
*done = true;
return NULL;
}
*rid = ridArray[(*index)++];
}
*isNull = isNullVal<W>(type, val8);
*isEmpty = isEmptyVal<W>(type, val8);
//cout << "nextUnsignedColValue index " << *index << " rowid " << *rid << endl;
// at this point, nextRid is the index to return, and index is...
// if RIDs are not specified, nextRid + 1,
// if RIDs are specified, it's the next index in the rid array.
return &val8[*rid * W];
#ifdef PRIM_DEBUG
throw logic_error("PrimitiveProcessor::nextColBinValue() bad width");
#endif
return NULL;
}
}
template<int W>
inline int64_t nextColValue(int type,
@ -1426,6 +1531,225 @@ inline void p_Col_ridArray(NewColRequestHeader* in,
#endif
}
// for BINARY
template<int W>
inline void p_Col_bin_ridArray(NewColRequestHeader* in,
NewColResultHeader* out,
unsigned outSize,
unsigned* written, int* block, Stats* fStatsPtr, unsigned itemsPerBlk,
boost::shared_ptr<ParsedColumnFilter> parsedColumnFilter)
{
uint16_t* ridArray = 0;
uint8_t* in8 = reinterpret_cast<uint8_t*>(in);
const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W;
idb_regex_t placeholderRegex;
placeholderRegex.used = false;
//FIXME: pCol is setting it to 8192 cause logicalBlockMode is true
if(itemsPerBlk == BLOCK_SIZE){
itemsPerBlk = BLOCK_SIZE/W;
}
if (in->NVALS > 0)
ridArray = reinterpret_cast<uint16_t*>(&in8[sizeof(NewColRequestHeader) +
(in->NOPS * filterSize)]);
if (ridArray && 1 == in->sort )
{
qsort(ridArray, in->NVALS, sizeof(uint16_t), compareBlock<uint16_t>);
if (fStatsPtr)
#ifdef _MSC_VER
fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'O');
#else
fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'O');
#endif
}
// Set boolean indicating whether to capture the min and max values.
out->ValidMinMax = isMinMaxValid(in);
if (out->ValidMinMax)
{
if (isUnsigned((CalpontSystemCatalog::ColDataType)in->DataType))
{
out->Min = static_cast<int64_t>(numeric_limits<uint64_t>::max());
out->Max = 0;
}
else
{
out->Min = numeric_limits<int64_t>::max();
out->Max = numeric_limits<int64_t>::min();
}
}
else
{
out->Min = 0;
out->Max = 0;
}
typedef char binWtype [W];
const ColArgs* args = NULL;
int64_t val = 0;
binWtype* bval;
int nextRidIndex = 0, argIndex = 0;
bool done = false, cmp = false, isNull = false, isEmpty = false;
uint16_t rid = 0;
prestored_set_t::const_iterator it;
binWtype* argVals = (binWtype*)alloca(in->NOPS * W);
uint8_t* std_cops = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t));
uint8_t* std_rfs = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t));
uint8_t* cops = NULL;
uint8_t* rfs = NULL;
scoped_array<idb_regex_t> std_regex;
idb_regex_t* regex = NULL;
uint8_t likeOps = 0;
// no pre-parsed column filter is set, parse the filter in the message
if (parsedColumnFilter.get() == NULL) {
std_regex.reset(new idb_regex_t[in->NOPS]);
regex = &(std_regex[0]);
cops = std_cops;
rfs = std_rfs;
for (argIndex = 0; argIndex < in->NOPS; argIndex++) {
args = reinterpret_cast<const ColArgs*> (&in8[sizeof (NewColRequestHeader) +
(argIndex * filterSize)]);
cops[argIndex] = args->COP;
rfs[argIndex] = args->rf;
memcpy(argVals[argIndex],args->val, W);
}
regex[argIndex].used = false;
}
// else we have a pre-parsed filter, and it's an unordered set for quick == comparisons
bval = (binWtype*)nextBinColValue<W>(in->DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull,
&isEmpty, &rid, in->OutputType, reinterpret_cast<uint8_t*>(block), itemsPerBlk);
while (!done)
{
// if((*((uint64_t *) (bval))) != 0)
// {
// cout << "rid "<< rid << " value ";
// if(W > 16) printf("%016X%016X ",( *(((uint64_t *) (bval)) +3)),(*(((uint64_t *) (bval)) +2)));
// printf("%016X%016X ",( *(((uint64_t *) (bval)) +1)),(*((uint64_t *) (bval))) );
//
// cout << endl;
// }
if (cops == NULL) // implies parsedColumnFilter && columnFilterMode == SET
{
/* bug 1920: ignore NULLs in the set and in the column data */
if (!(isNull && in->BOP == BOP_AND))
{
it = parsedColumnFilter->prestored_set->find(val);
if (in->BOP == BOP_OR)
{
// assume COP == COMPARE_EQ
if (it != parsedColumnFilter->prestored_set->end())
{
store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
}
}
else if (in->BOP == BOP_AND)
{
// assume COP == COMPARE_NE
if (it == parsedColumnFilter->prestored_set->end())
{
store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
}
}
}
}
else
{
for (argIndex = 0; argIndex < in->NOPS; argIndex++)
{
// if((*((uint64_t *) (uval))) != 0) cout << "comparing " << dec << (*((uint64_t *) (uval))) << " to " << (*((uint64_t *) (argVals[argIndex]))) << endl;
int val1 = memcmp(*bval, &argVals[argIndex], W);
switch (cops[argIndex]) {
case COMPARE_NIL:
cmp = false;
break;
case COMPARE_LT:
cmp = val1 < 0;
break;
case COMPARE_EQ:
cmp = val1 == 0;
break;
case COMPARE_LE:
cmp = val1 <= 0;
break;
case COMPARE_GT:
cmp = val1 > 0;
break;
case COMPARE_NE:
cmp = val1 != 0;
break;
case COMPARE_GE:
cmp = val1 >= 0;
break;
default:
logIt(34, cops[argIndex], "colCompare");
cmp = false; // throw an exception here?
}
// cout << cmp << endl;
if (in->NOPS == 1)
{
if (cmp == true)
{
store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
}
break;
}
else if (in->BOP == BOP_AND && cmp == false)
{
break;
}
else if (in->BOP == BOP_OR && cmp == true)
{
store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
break;
}
}
if ((argIndex == in->NOPS && in->BOP == BOP_AND) || in->NOPS == 0)
{
store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
}
}
bval = (binWtype*)nextBinColValue<W>(in->DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull,
&isEmpty, &rid, in->OutputType, reinterpret_cast<uint8_t*>(block), itemsPerBlk);
}
if (fStatsPtr)
#ifdef _MSC_VER
fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'K');
#else
fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'K');
#endif
} //namespace anon
namespace primitives
@ -1476,6 +1800,14 @@ void PrimitiveProcessor::p_Col(NewColRequestHeader* in, NewColResultHeader* out,
switch (in->DataSize)
{
case 32:
p_Col_bin_ridArray<32>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter);
break;
case 16:
p_Col_bin_ridArray<16>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter);
break;
case 8:
p_Col_ridArray<8>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter);
break;
@ -1579,6 +1911,8 @@ boost::shared_ptr<ParsedColumnFilter> parseColumnFilter
case 8:
ret->prestored_argVals[argIndex] = *reinterpret_cast<const uint64_t*>(args->val);
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
}
}
else
@ -1614,6 +1948,8 @@ boost::shared_ptr<ParsedColumnFilter> parseColumnFilter
case 8:
ret->prestored_argVals[argIndex] = *reinterpret_cast<const int64_t*>(args->val);
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
}
}

View File

@ -39,9 +39,9 @@
#include <cppunit/extensions/HelperMacros.h>
#include "primitiveprocessor.h"
using namespace primitives;
using namespace std;
int done;
void alarm_handler(int sig)
@ -87,7 +87,6 @@ class PrimTest : public CppUnit::TestFixture
CPPUNIT_TEST(p_IdxList_1);
CPPUNIT_TEST(p_IdxList_2);
// whole block tests
CPPUNIT_TEST(p_Col_1);
CPPUNIT_TEST(p_Col_2);
@ -163,6 +162,10 @@ class PrimTest : public CppUnit::TestFixture
// CPPUNIT_TEST(p_Dictionary_like_prefixbench_1);
// CPPUNIT_TEST(p_Dictionary_like_substrbench_1);
// binary data type
CPPUNIT_TEST(p_Col_bin_16);
CPPUNIT_TEST(p_Col_bin_32);
CPPUNIT_TEST_SUITE_END();
private:
@ -3745,6 +3748,178 @@ public:
close(fd);
}
template<uint8_t W> struct binary;
typedef binary<16> binary16;
typedef binary<32> binary32;
template<uint8_t W>
struct binary {
unsigned char data[W]; // May be ok for empty value ?
void operator=(uint64_t v) {*((uint64_t *) data) = v; memset(data + 8, 0, W - 8);}
inline uint8_t& operator[](const int index) {return *((uint8_t*) (data + index));}
inline uint64_t& uint64(const int index) {return *((uint64_t*) (data + (index << 3)));}
};
void p_Col_bin_16()
{
PrimitiveProcessor pp;
uint8_t input[BLOCK_SIZE], output[4 * BLOCK_SIZE], block[BLOCK_SIZE];
NewColRequestHeader* in;
NewColResultHeader* out;
ColArgs* args;
binary16* results;
uint32_t written, i;
int fd;
binary16 tmp;
binary16* bin16 = (binary16*) block;
for(int i = 0; i < BLOCK_SIZE/16; i++)
{
bin16[i] = 0;
}
bin16[0].uint64(0) = 10UL;
bin16[1].uint64(0) = 1000UL;
bin16[3].uint64(0) = 1000UL;
bin16[3].uint64(1) = 1;
bin16[4].uint64(0) = 256;
bin16[4].uint64(1) = 1;
typedef char bin16_t[16];
*(uint64_t*)(((bin16_t*)block) + 5) = 500;
*(uint64_t*)&((bin16_t*)block)[6] = 501;
memset(input, 0, BLOCK_SIZE);
memset(output, 0, 4 * BLOCK_SIZE);
in = reinterpret_cast<NewColRequestHeader*>(input);
out = reinterpret_cast<NewColResultHeader*>(output);
args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader)]);
in->DataSize = sizeof(binary16);
in->DataType = execplan::CalpontSystemCatalog::BINARY;
in->OutputType = OT_DATAVALUE;
in->NOPS = 3;
in->BOP = BOP_OR;
in->NVALS = 0;
tmp = 10;
args->COP = COMPARE_EQ;
memcpy(args->val, &tmp, in->DataSize);
args = reinterpret_cast<ColArgs*> (args->val + in->DataSize);
args->COP = COMPARE_EQ;
tmp = 1000;
memcpy(args->val, &tmp, in->DataSize);
args = reinterpret_cast<ColArgs*> (args->val + in->DataSize);
tmp.uint64(0) = 256;
tmp.uint64(1) = 1;
args->COP = COMPARE_EQ;
memcpy(args->val, &tmp, in->DataSize);
pp.setBlockPtr((int*) block);
pp.p_Col(in, out, 4 * BLOCK_SIZE, &written);
results = reinterpret_cast<binary16*>(&output[sizeof(NewColResultHeader)]);
// cout << "NVALS = " << out->NVALS << endl;
CPPUNIT_ASSERT_EQUAL((uint16_t)3, out->NVALS);
CPPUNIT_ASSERT_EQUAL((u_int64_t)10, results[0].uint64(0));
CPPUNIT_ASSERT_EQUAL((u_int64_t)1000, results[1].uint64(0));
for (i = 0; i < out->NVALS; i++) {
printf("Result %d Value %016X%016X\n",i ,results[i].uint64(1),results[i].uint64(0) );
// CPPUNIT_ASSERT(results[i] == (uint32_t) (i < 10 ? i : i - 10 + 1001));
}
}
void p_Col_bin_32()
{
PrimitiveProcessor pp;
uint8_t input[2 * BLOCK_SIZE], output[8 * BLOCK_SIZE], block[BLOCK_SIZE];
NewColRequestHeader* in;
NewColResultHeader* out;
ColArgs* args;
binary32* results;
uint32_t written, i;
int fd;
binary32 tmp;
binary32* bin32 = (binary32*) block;
for(int i = 0; i < BLOCK_SIZE/32; i++)
{
bin32[i].uint64(0) = 0;
}
bin32[0].uint64(0) = 10UL;
bin32[1].uint64(0) = 1000UL;
bin32[3].uint64(0) = 1000UL;
bin32[3].uint64(1) = 1;
bin32[4].uint64(0) = 256;
bin32[4].uint64(1) = 254;
bin32[4].uint64(2) = 253;
bin32[4].uint64(3) = 252;
typedef char bin32_t[32];
*(uint64_t*)(((bin32_t*)block) + 5) = 500;
*(uint64_t*)&((bin32_t*)block)[6] = 501;
memset(input, 0, BLOCK_SIZE);
memset(output, 0, 4 * BLOCK_SIZE);
in = reinterpret_cast<NewColRequestHeader*>(input);
out = reinterpret_cast<NewColResultHeader*>(output);
args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader)]);
in->DataSize = sizeof(binary32);
in->DataType = execplan::CalpontSystemCatalog::BINARY;
in->OutputType = OT_DATAVALUE;
in->NOPS = 3;
in->BOP = BOP_OR;
in->NVALS = 0;
tmp = 10;
args->COP = COMPARE_EQ;
memcpy(args->val, &tmp, in->DataSize);
args = reinterpret_cast<ColArgs*> (args->val + in->DataSize);
args->COP = COMPARE_EQ;
tmp = 1000;
memcpy(args->val, &tmp, in->DataSize);
args = reinterpret_cast<ColArgs*> (args->val + in->DataSize);
tmp.uint64(0) = 256;
tmp.uint64(1) = 254;
tmp.uint64(2) = 253;
tmp.uint64(3) = 252;
args->COP = COMPARE_EQ;
memcpy(args->val, &tmp, in->DataSize);
pp.setBlockPtr((int*) block);
pp.p_Col(in, out, 4 * BLOCK_SIZE, &written);
results = reinterpret_cast<binary32*>(&output[sizeof(NewColResultHeader)]);
// cout << "NVALS = " << out->NVALS << endl;
CPPUNIT_ASSERT_EQUAL((uint16_t)3, out->NVALS);
// CPPUNIT_ASSERT_EQUAL((u_int64_t)10, results[0].uint64(0));
// CPPUNIT_ASSERT_EQUAL((u_int64_t)1000, results[1].uint64(0));
for (i = 0; i < out->NVALS; i++) {
printf("Result %d Value %016X%016X%016X%016X\n",i ,results[i].uint64(3),results[i].uint64(2),results[i].uint64(1),results[i].uint64(0) );
// CPPUNIT_ASSERT(results[i] == (uint32_t) (i < 10 ? i : i - 10 + 1001));
}
}
void p_Dictionary_1()
{
PrimitiveProcessor pp;

View File

@ -273,6 +273,31 @@ void ColumnCommand::process_OT_BOTH()
/* this is verbose and repetative to minimize the work per row */
switch (colType.colWidth)
{
case 16:
for (i = 0, pos = sizeof(NewColResultHeader); i < outMsg->NVALS; ++i)
{
if (makeAbsRids)
bpp->absRids[i] = *((uint16_t*) &bpp->outputMsg[pos]) + bpp->baseRid;
bpp->relRids[i] = *((uint16_t*) &bpp->outputMsg[pos]);
pos += 2;
// values[i] is 8 Bytes wide so coping the pointer to bpp->outputMsg[pos] and crossing fingers
// I dont know the liveness of bpp->outputMsg but also I dont know if there is other memory area I can use
values[i] = (int64_t) &bpp->outputMsg[pos];
// cout<< "CC: BIN16 " << i << " "
// << hex
// << *((int64_t*)values[i])
// << " "
// << *(((int64_t*)values[i]) +1)
// << endl;
pos += 16;
}
break;
case 8:
for (i = 0, pos = sizeof(NewColResultHeader); i < outMsg->NVALS; ++i)
{
@ -346,6 +371,14 @@ void ColumnCommand::process_OT_DATAVALUE()
// cout << "rid Count is " << bpp->ridCount << endl;
switch (colType.colWidth)
{
case 16:
{
memcpy(values, outMsg + 1, outMsg->NVALS << 3);
cout << " CC: first value is " << values[0] << endl;
break;
}
case 8:
{
memcpy(values, outMsg + 1, outMsg->NVALS << 3);
@ -459,6 +492,9 @@ void ColumnCommand::createCommand(ByteStream& bs)
bs >> BOP;
bs >> filterCount;
deserializeInlineVector(bs, lastLbid);
// cout << __func__ << " colType.colWidth " << colType.colWidth << endl;
// cout << "lastLbid count=" << lastLbid.size() << endl;
// for (uint32_t i = 0; i < lastLbid.size(); i++)
// cout << " " << lastLbid[i];
@ -554,6 +590,11 @@ void ColumnCommand::prep(int8_t outputType, bool absRids)
shift = 1;
mask = 0x01;
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix shift and mask for 16 Bytes ?"<< endl;
shift = 1;
mask = 0x01;
break;
default:
cout << "CC: colWidth is " << colType.colWidth << endl;
@ -751,6 +792,9 @@ void ColumnCommand::projectResultRG(RowGroup& rg, uint32_t pos)
break;
}
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
}
}

View File

@ -77,6 +77,9 @@ void PassThruCommand::project()
switch (colWidth)
{
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
case 8:
bpp->serialized->append((uint8_t*) bpp->values, bpp->ridCount << 3);
break;
@ -153,6 +156,21 @@ void PassThruCommand::projectIntoRowGroup(RowGroup& rg, uint32_t col)
}
break;
case 16:
cout << __FILE__ << ":" << __LINE__ << " PassThruCommand::projectIntoRowGroup" << " Addition for 16 Bytes" << endl;
for (i = 0; i < bpp->ridCount; i++)
{
cout << "PTC: " << "BIN16 " << i << " "
<< hex
<< *((int64_t*) bpp->values[i])
<< " "
<< *(((int64_t*) bpp->values[i]) +1)
<< endl;
// values[i] is 8 bytes so it contains the pointer to bpp->outputMsg set by ColumnCommand::process_OT_BOTH()
r.setBinaryField((uint8_t*)bpp->values[i], 16, offset);
r.nextRow(rowSize);
}
}
}

View File

@ -91,7 +91,8 @@ void PseudoCC::loadData()
case 8:
loadPMNumber<uint64_t>();
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
cout << "PC::loadData(): bad column width" << endl;
break;
@ -143,7 +144,8 @@ void PseudoCC::loadData()
case 8:
loadSegmentNum<uint64_t>();
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
cout << "PC::loadData(): bad column width" << endl;
break;
@ -170,6 +172,8 @@ void PseudoCC::loadData()
loadPartitionNum<uint64_t>();
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
cout << "PC::loadData(): bad column width" << endl;
break;
@ -196,6 +200,8 @@ void PseudoCC::loadData()
loadLBID<uint64_t>();
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
cout << "PC::loadData(): bad column width" << endl;
break;
@ -221,6 +227,8 @@ void PseudoCC::loadData()
case 8:
loadDBRootNum<uint64_t>();
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
cout << "PC::loadData(): bad column width" << endl;
@ -250,6 +258,8 @@ void PseudoCC::loadData()
case 8:
loadSingleValue<int64_t>(valueFromUM);
break;
case 16:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
cout << "PC::loadData(): bad column width" << endl;

View File

@ -126,6 +126,9 @@ uint64_t getNullValue(CalpontSystemCatalog::ColDataType t, uint32_t colWidth)
case CalpontSystemCatalog::UBIGINT:
return joblist::UBIGINTNULL;
case CalpontSystemCatalog::BINARY:
return joblist::BINARYNULL;
case CalpontSystemCatalog::VARBINARY:
default:
ostringstream os;
@ -239,6 +242,9 @@ int64_t getSignedNullValue(CalpontSystemCatalog::ColDataType t, uint32_t colWidt
case CalpontSystemCatalog::LONGDOUBLE:
return (int64_t)joblist::LONGDOUBLENULL;
case CalpontSystemCatalog::BINARY:
return (int64_t)joblist::BINARYNULL;
case CalpontSystemCatalog::VARBINARY:
default:
ostringstream os;

View File

@ -1531,6 +1531,10 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType,
value = data;
break;
case CalpontSystemCatalog::BINARY:
value = data;
break;
default:
throw QueryDataExcept("convertColumnData: unknown column data type.", dataTypeErr);
break;
@ -1727,6 +1731,12 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType,
}
break;
case CalpontSystemCatalog::BINARY:
{
value = data;
}
break;
case CalpontSystemCatalog::UTINYINT:
{
uint8_t utinyintvalue = joblist::UTINYINTNULL;

View File

@ -130,6 +130,15 @@ string Func_hex::getStrVal(rowgroup::Row& row,
return string(hexPtr.get(), hexLen);
}
case CalpontSystemCatalog::BINARY:
{
const string& arg = parm[0]->data()->getStrVal(row, isNull);
uint64_t hexLen = arg.size() * 2;
scoped_array<char> hexPtr(new char[hexLen + 1]); // "+ 1" for the last \0
octet2hex(hexPtr.get(), arg.data(), arg.size());
return string(hexPtr.get(), hexLen);
}
default:
{
dec = (uint64_t)parm[0]->data()->getIntVal(row, isNull);

View File

@ -628,7 +628,8 @@ string Row::toString() const
os << " " << dec;
break;
}
case CalpontSystemCatalog::BINARY:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
default:
os << getIntField(i) << " ";
break;
@ -690,7 +691,8 @@ string Row::toCSV() const
os << dec;
break;
}
case CalpontSystemCatalog::BINARY:
std::cout << __FILE__<< __LINE__ << ":" << "toCSV"<< std::endl;
default:
os << getIntField(i);
break;
@ -852,7 +854,8 @@ void Row::initToNull()
case CalpontSystemCatalog::UBIGINT:
*((uint64_t*) &data[offsets[i]]) = joblist::UBIGINTNULL;
break;
case CalpontSystemCatalog::BINARY:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
default:
ostringstream os;
os << "Row::initToNull(): got bad column type (" << types[i] <<
@ -934,7 +937,8 @@ bool Row::isNullValue(uint32_t colIndex) const
case 8:
return
(*((uint64_t*) &data[offsets[colIndex]]) == joblist::CHAR8NULL);
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
default:
return (*((uint64_t*) &data[offsets[colIndex]]) == *((uint64_t*) joblist::CPNULLSTRMARK.c_str()));
}
@ -1004,6 +1008,16 @@ bool Row::isNullValue(uint32_t colIndex) const
return (*((long double*) &data[offsets[colIndex]]) == joblist::LONGDOUBLENULL);
break;
case CalpontSystemCatalog::BINARY:
{
// When is null? I dont know. Wait for bitmap null empty implemtenttion ?
// Also still pendig rework discussed use pointers for empty null values
std::cout << __FILE__<< ":" << __LINE__ << " isNullValue value " << (*((uint64_t*) &data[offsets[colIndex]])) << std::endl;
//return false;
return (*((uint64_t*) &data[offsets[colIndex]]) == joblist::BINARYEMPTYROW);
}
default:
{
ostringstream os;
@ -1624,6 +1638,7 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList&
case 8:
cr->PutData(row.getUintField<8>(j));
break;
case 16:
default:
{
@ -1645,6 +1660,8 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList&
cr->PutData(row.getUintField<4>(j));
break;
case CalpontSystemCatalog::BINARY:
std::cout << __FILE__<< __LINE__ << __func__<< std::endl;
default:
cr->PutData(row.getIntField<8>(j));
}

View File

@ -28,6 +28,7 @@
#ifndef ROWGROUP_H_
#define ROWGROUP_H_
#include <iostream>
#include <vector>
#include <string>
#include <stdexcept>
@ -421,7 +422,7 @@ public:
inline uint32_t getStringLength(uint32_t colIndex) const;
void setStringField(const std::string& val, uint32_t colIndex);
inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex);
inline void setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset);
// support VARBINARY
// Add 2-byte length at the CHARSET_INFO*beginning of the field. NULL and zero length field are
// treated the same, could use one of the length bit to distinguish these two cases.
@ -433,6 +434,8 @@ public:
inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const;
inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
inline std::string getBinaryField(uint32_t colIndex) const;
inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
inline void setUserData(mcsv1sdk::mcsv1Context& context,
boost::shared_ptr<mcsv1sdk::UserData> userData,
@ -664,7 +667,8 @@ inline bool Row::equals(uint64_t val, uint32_t colIndex) const
case 8:
return *((uint64_t*) &data[offsets[colIndex]]) == val;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
default:
idbassert(0);
throw std::logic_error("Row::equals(): bad length.");
@ -692,7 +696,8 @@ inline uint64_t Row::getUintField(uint32_t colIndex) const
case 8:
return *((uint64_t*) &data[offsets[colIndex]]);
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
default:
idbassert(0);
throw std::logic_error("Row::getUintField(): bad length.");
@ -711,7 +716,8 @@ inline uint64_t Row::getUintField(uint32_t colIndex) const
case 4:
return *((uint32_t*) &data[offsets[colIndex]]);
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
case 8:
return *((uint64_t*) &data[offsets[colIndex]]);
@ -784,6 +790,12 @@ inline uint32_t Row::getStringLength(uint32_t colIndex) const
return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
}
inline void Row::setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset)
{
memcpy(&data[offset], strdata, length);
}
inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
{
uint64_t offset;
@ -817,6 +829,11 @@ inline std::string Row::getStringField(uint32_t colIndex) const
strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
}
inline std::string Row::getBinaryField(uint32_t colIndex) const
{
return std::string((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
}
inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const
{
if (inStringTable(colIndex))
@ -936,7 +953,10 @@ inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
case 8:
*((uint64_t*) &data[offset]) = val;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
*((uint64_t*) &data[offset]) = val;
break;
default:
idbassert(0);
throw std::logic_error("Row::setUintField called on a non-uint32_t field");
@ -974,7 +994,10 @@ inline void Row::setUintField(uint64_t val, uint32_t colIndex)
case 8:
*((uint64_t*) &data[offsets[colIndex]]) = val;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
*((uint64_t*) &data[offsets[colIndex]]) = val;
break;
default:
idbassert(0);
throw std::logic_error("Row::setUintField called on a non-uint32_t field");
@ -1000,7 +1023,9 @@ inline void Row::setUintField(uint64_t val, uint32_t colIndex)
case 8:
*((uint64_t*) &data[offsets[colIndex]]) = val;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
*((uint64_t*) &data[offsets[colIndex]]) = val;
default:
idbassert(0);
throw std::logic_error("Row::setUintField: bad length");
@ -1027,7 +1052,8 @@ inline void Row::setIntField(int64_t val, uint32_t colIndex)
case 8:
*((int64_t*) &data[offsets[colIndex]]) = val;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
default:
idbassert(0);
throw std::logic_error("Row::setIntField: bad length");
@ -1053,7 +1079,8 @@ inline void Row::setIntField(int64_t val, uint32_t colIndex)
case 8:
*((int64_t*) &data[offsets[colIndex]]) = val;
break;
case 16:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
default:
idbassert(0);
throw std::logic_error("Row::setIntField: bad length");

View File

@ -808,6 +808,8 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
case detail::compact::CT_STRUCT:
return T_STRUCT;
default:
cout << __FILE__<< __LINE__ << __func__<< endl;
throw TException(std::string("don't know what type: ") + (char)type);
}
return T_STOP;

View File

@ -19,6 +19,7 @@
//#define NDEBUG
#include <iostream>
#include <cassert>
#include <cmath>
#include <sstream>
@ -68,7 +69,8 @@ boost::shared_ptr<WindowFunctionType> WF_count<T>::makeFunction(int id, const st
break;
}
default:
case CalpontSystemCatalog::BINARY:
std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
{
func.reset(new WF_count<int64_t>(id, name));
break;

View File

@ -19,6 +19,7 @@
//#define NDEBUG
#include <iostream>
#include <cassert>
#include <cmath>
#include <sstream>
@ -490,7 +491,8 @@ bool WF_udaf::dropValues(int64_t b, int64_t e)
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::BINARY:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
{
string errStr = "(" + colType2String[(int)datum.dataType] + ")";
@ -754,7 +756,8 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut,
setValue(colDataType, b, e, c, &strOut);
}
break;
case CalpontSystemCatalog::BINARY:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
{
std::ostringstream errmsg;
@ -1101,7 +1104,8 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::BINARY:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
default:
{
string errStr = "(" + colType2String[(int)datum.dataType] + ")";

View File

@ -19,6 +19,7 @@
*/
//#define NDEBUG
#include <iostream>
#include <cassert>
#include <sstream>
#include <iomanip>
@ -741,7 +742,8 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos)
case CalpontSystemCatalog::LONGDOUBLE:
v = &longDoubleNull;
break;
case CalpontSystemCatalog::BINARY:
cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
case CalpontSystemCatalog::VARBINARY:
default:
std::ostringstream oss;

View File

@ -506,6 +506,16 @@ uint8_t WE_DDLCommandProc::writeCreateSyscolumn(ByteStream& bs, std::string& err
throw std::runtime_error(os.str());
}
else if (dataType == CalpontSystemCatalog::BINARY
&& ! (colDefPtr->fType->fLength == 16
|| colDefPtr->fType->fLength == 32))
{
ostringstream os;
os << "binary length may not be other than 16 or 32";
throw std::runtime_error(os.str());
}
unsigned int i = 0;
column_iterator = columns.begin();

View File

@ -496,6 +496,10 @@ inline int convertDataType(int dataType)
calpontDataType = execplan::CalpontSystemCatalog::UDOUBLE;
break;
case ddlpackage::DDL_BINARY:
calpontDataType = execplan::CalpontSystemCatalog::BINARY;
break;
default:
throw runtime_error("Unsupported datatype!");

View File

@ -91,7 +91,8 @@ CPPUNIT_TEST(setUp);
// Extent & dict related testing
CPPUNIT_TEST( testExtensionWOPrealloc );
CPPUNIT_TEST( testDictExtensionWOPrealloc );
// Semaphore related testing
CPPUNIT_TEST( testExtentCrWOPreallocBin );
// Semaphore related testing
// CPPUNIT_TEST( testSem );
// Log related testing
@ -1542,6 +1543,179 @@ public:
}
*/
template<uint8_t W> struct binary;
typedef binary<16> binary16;
typedef binary<32> binary32;
template<uint8_t W>
struct binary {
unsigned char data[W]; // May be ok for empty value ?
void operator=(uint64_t v) {*((uint64_t *) data) = v; memset(data + 8, 0, W - 8);}
inline uint8_t& operator[](const int index) {return *((uint8_t*) (data + index));}
inline uint64_t& uint64(const int index) {return *((uint64_t*) (data + (index << 3)));}
};
void testExtentCrWOPreallocBin() {
IDBDataFile* pFile = NULL;
ColumnOpCompress1 fileOp;
BlockOp blockOp;
char fileName[20];
int rc;
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
int dbRoot = 1;
idbdatafile::IDBPolicy::init(true, false, "", 0);
// Set to versionbuffer to satisfy IDBPolicy::getType
strcpy(fileName, "versionbuffer");
fileOp.compressionType(1);
fileOp.deleteFile(fileName);
CPPUNIT_ASSERT(fileOp.exists(fileName) == false);
//binary16 emptyVal = blockOp.getEmptyBinRowValue( execplan::CalpontSystemCatalog::BINARY, 16 );
uint64_t emptyVal = blockOp.getEmptyRowValue(execplan::CalpontSystemCatalog::BIGINT, 8);
int width = blockOp.getCorrectRowWidth(execplan::CalpontSystemCatalog::BINARY, sizeof (binary16));
int nBlocks = INITIAL_EXTENT_ROWS_TO_DISK / BYTE_PER_BLOCK * width;
// createFile runs IDBDataFile::open + initAbrevCompColumnExtent
// under the hood
// bigint column file
rc = fileOp.createFile(fileName,
nBlocks, // number of blocks
emptyVal, // NULL value
width, // width
dbRoot); // dbroot
CPPUNIT_ASSERT(rc == NO_ERROR);
fileOp.closeFile(pFile);
// open created compressed file and check its header
pFile = IDBDataFile::open(IDBPolicy::getType(fileName,
IDBPolicy::WRITEENG), fileName, "rb", dbRoot);
rc = pFile->seek(0, 0);
CPPUNIT_ASSERT(rc == NO_ERROR);
rc = fileOp.readHeaders(pFile, hdrs);
CPPUNIT_ASSERT(rc == NO_ERROR);
// Couldn't use IDBDataFile->close() here w/o excplicit cast
fileOp.closeFile(pFile);
// Extend the extent up to 64MB
pFile = IDBDataFile::open(IDBPolicy::getType(fileName,
IDBPolicy::WRITEENG), fileName, "rb", dbRoot);
// disable disk space preallocation
idbdatafile::IDBPolicy::setPreallocSpace(dbRoot);
rc = fileOp.initColumnExtent(pFile,
dbRoot,
BYTE_PER_BLOCK - nBlocks, // number of blocks
emptyVal,
width,
false, // use existing file
false, // don't expand; new extent
false, // add full (not abbreviated) extent
true); // optimize extention
CPPUNIT_ASSERT(rc == NO_ERROR);
fileOp.closeFile(pFile);
// file has been extended
cout << endl << "file has been extended";
// write up to INITIAL_EXTENT_ROWS_TO_DISK + 1 rows into the file
Column curCol;
binary16 valArray[INITIAL_EXTENT_ROWS_TO_DISK + 1];
RID rowIdArray[INITIAL_EXTENT_ROWS_TO_DISK + 1];
// This is the magic for the stub in FileOp::oid2FileName
int fid = 42;
for (uint64_t it = 0; it <= INITIAL_EXTENT_ROWS_TO_DISK; it++) {
rowIdArray[it] = it;
valArray[it].uint64(0) = it + 3;
valArray[it].uint64(1) = it + 5;
}
fileOp.initColumn(curCol);
fileOp.setColParam(curCol,
1, // column number
width,
execplan::CalpontSystemCatalog::BINARY,
WriteEngine::WR_BINARY,
fid,
1); //compression type
string segFile;
// openColumnFile uses DBRM's oid server but we
// have to get the chunks' pointers from the header.
curCol.dataFile.pFile = fileOp.openFile(
curCol,
dbRoot,
0,
0,
segFile,
false,
"r+b",
BYTE_PER_BLOCK * BYTE_PER_BLOCK); // buffer size is 64MB
CPPUNIT_ASSERT(rc == NO_ERROR);
rc = fileOp.writeRow(curCol, INITIAL_EXTENT_ROWS_TO_DISK + 1,
(RID*) rowIdArray, valArray);
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc); // I prefer this way as it prints values
// flush and close the file used for reading
fileOp.clearColumn(curCol);
std::map<uint32_t, uint32_t> oids;
oids[fid] = fid;
// flush changed chunks from the Manager
int rtn1 = fileOp.chunkManager()->flushChunks(rc, oids);
// read back the file
cout << endl << "Read file ";
DataBlock block;
binary16* bin16 = (binary16*) block.data;
fileOp.initColumn(curCol);
fileOp.setColParam(curCol,
1, // column number
width,
execplan::CalpontSystemCatalog::BINARY,
WriteEngine::WR_BINARY,
fid,
1); //compression type
curCol.dataFile.pFile = fileOp.openFile(
curCol,
dbRoot,
0,
0,
segFile,
false,
"r+b",
BYTE_PER_BLOCK * BYTE_PER_BLOCK); // buffer size is 64MB
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc);
int blocks = fileOp.blocksInFile(curCol.dataFile.pFile);
for (int b = 0; b < blocks; b++) {
rc = fileOp.chunkManager()->readBlock(curCol.dataFile.pFile, block.data, b); // ColumnOpCompress1.readBlock() is protected so ...
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc);
//cout << endl << bin16[0].uint64(0);
CPPUNIT_ASSERT_EQUAL(b * 512UL + 3, bin16[0].uint64(0)); // Checking just first value of each block as it was written before
CPPUNIT_ASSERT_EQUAL(b * 512UL + 5, bin16[0].uint64(1));
}
fileOp.clearColumn(curCol);
fileOp.closeFile(curCol.dataFile.pFile); // Seems done by clearColumn, but anyways...
cout << endl << "Delete file ";
fileOp.deleteFile(fileName);
CPPUNIT_ASSERT(fileOp.exists(fileName) == false);
cout << endl << "End of test";
}
void testCleanup()
{

View File

@ -160,6 +160,10 @@ uint64_t BlockOp::getEmptyRowValue(
emptyVal = joblist::UBIGINTEMPTYROW;
break;
case CalpontSystemCatalog::BINARY :
emptyVal = joblist::BINARYEMPTYROW;
break;
case CalpontSystemCatalog::CHAR :
case CalpontSystemCatalog::VARCHAR :
case CalpontSystemCatalog::DATE :
@ -267,9 +271,11 @@ void BlockOp::setEmptyBuf(
// Optimize buffer initialization by constructing and copying in an array
// instead of individual values. This reduces the number of calls to
// memcpy().
for (int j = 0; j < ARRAY_COUNT; j++)
int w = width > 8 ? 8: width;
for(uint8_t* pos = emptyValArray, * end = pos + NBYTES_IN_ARRAY; pos < end; pos += w) //FIXME for no loop
{
memcpy(emptyValArray + (j * width), &emptyVal, width);
memcpy(pos, &emptyVal, w);
}
int countFull128 = (bufSize / width) / ARRAY_COUNT;

View File

@ -435,6 +435,11 @@ void Convertor::convertColType(CalpontSystemCatalog::ColDataType dataType,
internalType = WriteEngine::WR_ULONGLONG;
break;
// Map BINARY to WR_BINARY
case CalpontSystemCatalog::BINARY:
internalType = WriteEngine::WR_BINARY;
break;
default:
internalType = WriteEngine::WR_CHAR;
break;
@ -683,6 +688,11 @@ void Convertor::convertColType(ColStruct* curStruct)
*internalType = WriteEngine::WR_ULONGLONG;
break;
// Map BINARY to WR_BINARY
case CalpontSystemCatalog::BINARY:
*internalType = WriteEngine::WR_BINARY;
break;
default:
*internalType = WriteEngine::WR_CHAR;
break;
@ -773,6 +783,10 @@ int Convertor::getCorrectRowWidth(CalpontSystemCatalog::ColDataType dataType, in
newWidth = 8;
break;
case CalpontSystemCatalog::BINARY:
newWidth = width;
break;
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::VARBINARY: // treat same as varchar for now

View File

@ -108,9 +108,10 @@ enum ColType /** @brief Column type enumeration*/
WR_USHORT = 14, /** @brief Unsigned Short */
WR_UINT = 15, /** @brief Unsigned Int */
WR_ULONGLONG = 16, /** @brief Unsigned Long long*/
WR_TEXT = 17, /** @brief TEXT */
WR_TEXT = 17, /** @brief TEXT */
WR_MEDINT = 18, /** @brief Medium Int */
WR_UMEDINT = 19 /** @brief Unsigned Medium Int */
WR_UMEDINT = 19, /** @brief Unsigned Medium Int */
WR_BINARY = 20 /** @brief BINARY */
};
// Describes relation of field to column for a bulk load

View File

@ -123,6 +123,8 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent,
newFile = false;
Column newCol;
unsigned char buf[BYTE_PER_BLOCK];
unsigned char* curVal;
int64_t emptyVal = getEmptyRowValue(column.colDataType, column.colWidth); // Seems is ok have it here and just once
if (useStartingExtent)
{
@ -138,9 +140,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent,
if ( rc != NO_ERROR)
return rc;
for (j = 0; j < totalRowPerBlock; j++)
for (j = 0, curVal = buf; j < totalRowPerBlock; j++, curVal += column.colWidth)
{
if (isEmptyRow(buf, j, column))
if (isEmptyRow((uint64_t*)curVal, emptyVal, column.colWidth))
{
rowIdArray[counter] = getRowId(hwm, column.colWidth, j);
rowsallocated++;
@ -192,9 +194,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent,
}
}
for (j = 0; j < totalRowPerBlock; j++)
for (j = 0, curVal = buf; j < totalRowPerBlock; j++, curVal += column.colWidth)
{
if (isEmptyRow(buf, j, column))
if (isEmptyRow((uint64_t*)curVal, emptyVal, column.colWidth))
{
rowIdArray[counter] = getRowId(hwm, column.colWidth, j);
rowsallocated++;
@ -492,9 +494,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent,
}
}
for (j = 0; j < totalRowPerBlock; j++)
for (j = 0, curVal = buf; j < totalRowPerBlock; j++, curVal += column.colWidth)
{
if (isEmptyRow(buf, j, column))
if (isEmptyRow((uint64_t*)curVal, emptyVal, column.colWidth)) // Why to check it if beacause line 483 is always true ?
{
rowIdArray[counter] = getRowId(newHwm, column.colWidth, j);
rowsallocated++;
@ -533,9 +535,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent,
}
}
for (j = 0; j < totalRowPerBlock; j++)
for (j = 0, curVal = buf; j < totalRowPerBlock; j++, curVal += column.colWidth)
{
if (isEmptyRow(buf, j, column))
if (isEmptyRow((uint64_t*)curVal, emptyVal, column.colWidth))
{
rowIdArray[counter] = getRowId(newHwm, newCol.colWidth, j);
rowsallocated++;
@ -1080,7 +1082,8 @@ int ColumnOp::fillColumn(const TxnID& txnid, Column& column, Column& refCol, voi
}
else if (column.compressionType != 0) //@Bug 3866, fill the empty row value for compressed chunk
{
memcpy(colBuf + colBufOffset, &emptyVal, column.colWidth);
for(int b = 0, w = column.colWidth; b < column.colWidth; b += 8, w = 8) //FIXME for no loop!
memcpy(colBuf + colBufOffset + b, &emptyVal, w);
dirty = true;
}
@ -1405,18 +1408,39 @@ void ColumnOp::initColumn(Column& column) const
* RETURN:
* true if success, false otherwise
***********************************************************/
bool ColumnOp::isEmptyRow(unsigned char* buf, int offset, const Column& column)
// It is called at just 4 places on allocRowId() but all the time inside extend scanning loops
inline bool ColumnOp::isEmptyRow(uint64_t* curVal, uint64_t emptyVal, const int colWidth)
{
bool emptyFlag = true;
uint64_t curVal, emptyVal;
//Calling it here makes calling it "i" times from the calling loop at allocRowId()
//uint64_t emptyVal = getEmptyRowValue(column.colDataType, column.colWidth);
memcpy(&curVal, buf + offset * column.colWidth, column.colWidth);
emptyVal = getEmptyRowValue(column.colDataType, column.colWidth);
// No need for it if change param type.. just been lazy to add extra castings
//uint64_t &emptyVal = column.emptyVal;
if (/*curVal != emptyVal*/memcmp(&curVal, &emptyVal, column.colWidth))
emptyFlag = false;
//no need to multiply over and over if just increment the pointer on the caller
//uint64_t *curVal = (uint64_t*)(buf + offset * column.colWidth);
return emptyFlag;
switch(colWidth){
case 1:
return *(uint8_t*)curVal == emptyVal;
case 2:
return *(uint16_t*)curVal == emptyVal;
case 4:
return *(uint32_t*)curVal == emptyVal;
case 8:
return *curVal == emptyVal;
case 16:
return ((curVal[0] == emptyVal) && (curVal[1] == emptyVal));
case 32:
return ((curVal[0] == emptyVal) && (curVal[1] == emptyVal)
&& (curVal[2] == emptyVal) && (curVal[3] == emptyVal));
}
}
/***********************************************************
@ -1662,6 +1686,12 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray,
if (!bDelete) pVal = &((uint64_t*) valArray)[i];
break;
case WriteEngine::WR_BINARY:
if (!bDelete) pVal = (uint8_t*) valArray + i * curCol.colWidth;
//pOldVal = (uint8_t*) oldValArray + i * curCol.colWidth;
break;
default :
if (!bDelete) pVal = &((int*) valArray)[i];
break;
@ -1852,7 +1882,8 @@ int ColumnOp::writeRows(Column& curCol, uint64_t totalRow, const RIDList& ridLis
}
// This is the write stuff
writeBufValue(dataBuf + dataBio, pVal, curCol.colWidth);
for(int b = 0, w = curCol.colWidth > 8 ? 8 : curCol.colWidth; b < curCol.colWidth; b += 8) //FIXME for no loop
writeBufValue(dataBuf + dataBio + b, pVal, w);
i++;

View File

@ -220,7 +220,7 @@ public:
/**
* @brief Check whether it is an empty row
*/
EXPORT virtual bool isEmptyRow(unsigned char* buf, int offset, const Column& column);
EXPORT virtual bool isEmptyRow(uint64_t* curVal, uint64_t emptyVal, const int colWidth);
/**
* @brief Check whether it is a valid column

View File

@ -389,6 +389,15 @@ void WriteEngineWrapper::convertValue(const ColType colType, void* value, boost:
}
break;
case WriteEngine::WR_BINARY:
{
char val = boost::any_cast<char>(data);
//TODO:FIXME how to determine size ? 16, 32,48 ?
size = 16;
memcpy(value, &val, size);
}
break;
} // end of switch (colType)
} /*@convertValue - The base for converting values */
@ -492,6 +501,12 @@ void WriteEngineWrapper::convertValue(const ColType colType, void* valArray, con
case WriteEngine::WR_TOKEN:
((Token*)valArray)[pos] = boost::any_cast<Token>(data);
break;
case WriteEngine::WR_BINARY:
curStr = boost::any_cast<string>(data);
memcpy((char*)valArray + pos * curStr.length(), curStr.c_str(), curStr.length());
break;
} // end of switch (colType)
}
else
@ -557,6 +572,16 @@ void WriteEngineWrapper::convertValue(const ColType colType, void* valArray, con
case WriteEngine::WR_TOKEN:
data = ((Token*)valArray)[pos];
break;
case WriteEngine::WR_BINARY :
{
char tmp[16];
//TODO:FIXME how to determine size ? 16, 32,48 ?
memcpy(tmp, (char*)valArray + pos * 16, 16);
curStr = tmp;
data = curStr;
}
break;
} // end of switch (colType)
} // end of if
}
@ -3234,7 +3259,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid,
}
bool newFile;
cout << "Datafile " << curCol.dataFile.fSegFileName << endl;
#ifdef PROFILE
timer.start("allocRowId");
#endif
@ -5130,6 +5155,10 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid,
case WriteEngine::WR_TOKEN:
valArray = (Token*) calloc(sizeof(Token), totalRow1);
break;
case WriteEngine::WR_BINARY:
valArray = calloc(colStructList[i].colWidth, totalRow1);
break;
}
// convert values to valArray
@ -5349,6 +5378,11 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid,
tmp16 = curValue;
((uint16_t*)valArray)[j] = tmp16;
break;
case WriteEngine::WR_BINARY:
((uint64_t*)valArray)[j] = curValue; //FIXME maybe
break;
}
}
@ -5492,6 +5526,10 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid,
tmp16 = curValue;
((uint16_t*)valArray)[j] = tmp16;
break;
case WriteEngine::WR_BINARY:
((uint64_t*)valArray)[j] = curValue; // FIXME maybe
break;
}
}
@ -5775,6 +5813,9 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid,
case WriteEngine::WR_TOKEN:
valArray = (Token*) calloc(sizeof(Token), 1);
break;
case WriteEngine::WR_BINARY:
valArray = (char*) calloc(sizeof(char), curColStruct.colWidth); //FIXME maybe
break;
}
// convert values to valArray