diff --git a/datatypes/mcs_datatype.cpp b/datatypes/mcs_datatype.cpp index d8bc951c3..efcc5f4f4 100644 --- a/datatypes/mcs_datatype.cpp +++ b/datatypes/mcs_datatype.cpp @@ -524,7 +524,7 @@ int TypeHandlerStr::storeValueToFieldCharVarchar(rowgroup::Row &row, int pos, return f->store_string(tmp, strlen(tmp)); } default: - return f->store_string((const char*)row.getStringPointer(pos), row.getStringLength(pos)); + return f->storeConstString(row.getConstString(pos)); } } diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index fd47bc66a..c257e9b11 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -21,6 +21,7 @@ #include #include #include "exceptclasses.h" +#include "conststring.h" #include "mcs_numeric_limits.h" #include "mcs_data_condition.h" #include "mcs_decimal.h" @@ -909,6 +910,11 @@ public: virtual int store_decimal64(const datatypes::Decimal& dec) = 0; virtual int store_decimal128(const datatypes::Decimal& dec) = 0; virtual int store_lob(const char *str, size_t length) = 0; + + int storeConstString(const utils::ConstString &str) + { + return store_string(str.str(), str.length()); + } }; diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 675f6cefb..54e7299a2 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -408,16 +408,19 @@ void AggregateColumn::evaluate(Row& row, bool& isNull) break; default: - if (row.equals(CPNULLSTRMARK, fInputIndex)) + { + auto const str = row.getConstString(fInputIndex); + if (str.eq(utils::ConstString(CPNULLSTRMARK))) isNull = true; else - fResult.strVal = row.getStringField(fInputIndex); + fResult.strVal = str.toString(); // stringColVal is padded with '\0' to colWidth so can't use str.length() if (strlen(fResult.strVal.c_str()) == 0) isNull = true; break; + } } if (fResultType.colDataType == CalpontSystemCatalog::STRINT) diff --git a/dbcon/execplan/windowfunctioncolumn.cpp b/dbcon/execplan/windowfunctioncolumn.cpp index afb1562fd..57a32a95a 100644 --- a/dbcon/execplan/windowfunctioncolumn.cpp +++ b/dbcon/execplan/windowfunctioncolumn.cpp @@ -494,16 +494,19 @@ void WindowFunctionColumn::evaluate(Row& row, bool& isNull) case 16: //fallthrough default: - if (row.equals(CPNULLSTRMARK, fInputIndex)) + { + const auto str = row.getConstString(fInputIndex); + if (str.eq(utils::ConstString(CPNULLSTRMARK))) isNull = true; else - fResult.strVal = row.getStringField(fInputIndex); + fResult.strVal = str.toString(); // stringColVal is padded with '\0' to colWidth so can't use str.length() if (strlen(fResult.strVal.c_str()) == 0) isNull = true; break; + } } if (fResultType.colDataType == CalpontSystemCatalog::STRINT) diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 09174b381..91aa4e3b4 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -375,7 +375,7 @@ void GroupConcatAgUM::applyMapping(const boost::shared_array& mapping, cons fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::VARCHAR || fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT) { - fRow.setStringField(row.getStringPointer(mapping[i]), row.getStringLength(mapping[i]), i); + fRow.setStringField(row.getConstString(mapping[i]), i); } else if (fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE) { @@ -621,8 +621,7 @@ int64_t GroupConcator::lengthEstimate(const rowgroup::Row& row) case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::TEXT: { - int64_t colWidth = row.getStringLength(*i); - fieldLen += colWidth; // getStringLength() does the same thing as below + fieldLen += row.getConstString(*i).length(); break; } diff --git a/primitives/primproc/dictstep.cpp b/primitives/primproc/dictstep.cpp index fd8c3013d..2981e4ff8 100644 --- a/primitives/primproc/dictstep.cpp +++ b/primitives/primproc/dictstep.cpp @@ -578,7 +578,7 @@ void DictStep::_projectToRG(RowGroup& rg, uint32_t col) { rg.getRow(newRidList[i].pos, &r); //cout << "serializing " << tmpStrings[i] << endl; - r.setStringField(tmpStrings[i].ptr, tmpStrings[i].len, col); + r.setStringField(tmpStrings[i].getConstString(), col); } } else diff --git a/primitives/primproc/dictstep.h b/primitives/primproc/dictstep.h index 6af6d2a95..0eb37ca7c 100644 --- a/primitives/primproc/dictstep.h +++ b/primitives/primproc/dictstep.h @@ -83,6 +83,10 @@ private: StringPtr() : ptr(NULL), len(0) {;} StringPtr(const uint8_t* p, unsigned l) : ptr(p), len(l) {;} + utils::ConstString getConstString() const + { + return utils::ConstString((const char *) ptr, len); + } }; void _execute(); diff --git a/utils/common/conststring.h b/utils/common/conststring.h index f7fd890e9..685900aac 100644 --- a/utils/common/conststring.h +++ b/utils/common/conststring.h @@ -38,10 +38,18 @@ public: const char *str() const { return mStr; } const char *end() const { return mStr + mLength; } size_t length() const { return mLength; } + std::string toString() const + { + return std::string(mStr, mLength); + } bool eq(char ch) const { return mLength == 1 && mStr[0] == ch; } + bool eq(const ConstString &rhs) const + { + return mLength == rhs.mLength && !memcmp(mStr, rhs.mStr, mLength); + } ConstString & rtrimZero() { for ( ; mLength && mStr[mLength - 1] == '\0'; mLength--) diff --git a/utils/joiner/tuplejoiner.cpp b/utils/joiner/tuplejoiner.cpp index bdd142ecc..880d4662c 100644 --- a/utils/joiner/tuplejoiner.cpp +++ b/utils/joiner/tuplejoiner.cpp @@ -1228,6 +1228,9 @@ public: TypelessDataStringEncoder(const uint8_t *str, uint32_t length) :mStr(str), mLength(length) { } + TypelessDataStringEncoder(const utils::ConstString &str) + :mStr((const uint8_t*) str.str()), mLength(str.length()) + { } bool store(uint8_t* to, uint32_t& off, uint32_t keylen) const { if (mLength > 0xFFFF) // We encode length into two bytes below @@ -1563,9 +1566,8 @@ TypelessData makeTypelessKey(const Row& r, const vector& keyCols, if (datatypes::isCharType(type)) { // this is a string, copy a normalized version - const uint8_t* str = r.getStringPointer(keyCols[i]); - uint32_t width = r.getStringLength(keyCols[i]); - if (TypelessDataStringEncoder(str, width).store(ret.data, off, keylen)) + const utils::ConstString str = r.getConstString(keyCols[i]); + if (TypelessDataStringEncoder(str).store(ret.data, off, keylen)) goto toolong; } else if (datatypes::isWideDecimalType(type, r.getColumnWidth(keyCols[i]))) @@ -1683,15 +1685,10 @@ uint64_t getHashOfTypelessKey(const Row& r, const vector& keyCols, uin type == CalpontSystemCatalog::TEXT) { // this is a string, copy a normalized version - const uint8_t* str = r.getStringPointer(keyCols[i]); - uint32_t len = r.getStringLength(keyCols[i]); - ret = hasher((const char*) str, len, ret); - /* - for (uint32_t j = 0; j < width && str[j] != 0; j++) - ret.data[off++] = str[j]; - */ + const utils::ConstString str = r.getConstString(keyCols[i]); + ret = hasher(str.str(), str.length(), ret); ret = hasher(&nullChar, 1, ret); - width += len + 1; + width += str.length() + 1; } else if (r.getColType(keyCols[i]) == CalpontSystemCatalog::LONGDOUBLE) { diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 3c47d210a..e87cb55df 100755 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -388,7 +388,8 @@ inline bool RowAggregation::isNull(const RowGroup* pRowGroup, const Row& row, in else { //@bug 1821 - ret = (row.equals(string(""), col) || row.equals(joblist::CPNULLSTRMARK, col)); + auto const str = row.getConstString(col); + ret = str.length() == 0 || str.eq(utils::ConstString(joblist::CPNULLSTRMARK)); } break; @@ -486,7 +487,8 @@ inline bool RowAggregation::isNull(const RowGroup* pRowGroup, const Row& row, in case execplan::CalpontSystemCatalog::VARBINARY: case execplan::CalpontSystemCatalog::BLOB: { - ret = (row.equals(string(""), col) || row.equals(joblist::CPNULLSTRMARK, col)); + auto const str = row.getConstString(col); + ret = str.length() == 0 || str.eq(utils::ConstString(joblist::CPNULLSTRMARK)); break; } @@ -949,8 +951,7 @@ void RowAggregation::initMapData(const Row& rowIn) } else { - fRow.setStringField(rowIn.getStringPointer(colIn), - rowIn.getStringLength(colIn), colOut); + fRow.setStringField(rowIn.getConstString(colIn), colOut); } break; diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index 3266b257e..60688a327 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -346,45 +346,6 @@ void UserDataStore::deserialize(ByteStream& bs) return; } -inline bool StringStore::equals(const std::string& str, uint64_t off, CHARSET_INFO* cs) const -{ - uint32_t length; - - if (off == std::numeric_limits::max()) - return str == joblist::CPNULLSTRMARK; - - MemChunk* mc; - - if (off & 0x8000000000000000) - { - if (longStrings.size() <= (off & ~0x8000000000000000)) - return false; - - mc = (MemChunk*) longStrings[off & ~0x8000000000000000].get(); - - memcpy(&length, mc->data, 4); - - // Not sure if this check it needed, but adds safety - if (length > mc->currentSize) - return false; - - return (cs->strnncoll(str.c_str(), str.length(), (const char*)mc->data+4, length) == 0); - } - - uint32_t chunk = off / CHUNK_SIZE; - uint32_t offset = off % CHUNK_SIZE; - - if (mem.size() <= chunk) - return false; - - mc = (MemChunk*) mem[chunk].get(); - memcpy(&length, &mc->data[offset], 4); - - if ((offset + length) > mc->currentSize) - return false; - - return (cs->strnncoll(str.c_str(), str.length(), (const char*)&mc->data[offset]+4, length) == 0); -} RGData::RGData() { @@ -598,8 +559,10 @@ string Row::toString(uint32_t rownum) const case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: { - const string& tmp = getStringField(i); - os << "(" << getStringLength(i) << ") '" << tmp << "' "; + const utils::ConstString tmp = getConstString(i); + os << "(" << tmp.length() << ") '"; + os.write(tmp.str(), tmp.length()); + os << "' "; break; } @@ -1112,24 +1075,6 @@ int64_t Row::getSignedNullValue(uint32_t colIndex) const return utils::getSignedNullValue(types[colIndex], getColumnWidth(colIndex)); } -bool Row::equals(const std::string& val, uint32_t col) const -{ - const CHARSET_INFO* cs = getCharset(col); - if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::BLOB)) - { - if (getStringLength(col) != val.length()) - return false; - - if (memcmp(getStringPointer(col), val.c_str(), val.length())) - return false; - } - else - { - return (cs->strnncollsp((char*)getStringPointer(col), getStringLength(col), - val.c_str(), val.length()) == 0); - } - return true; -} bool Row::equals(const Row& r2, uint32_t lastCol) const { @@ -1151,19 +1096,15 @@ bool Row::equals(const Row& r2, uint32_t lastCol) const cscDataType columnType = getColType(col); if (UNLIKELY(typeHasCollation(columnType))) { - CHARSET_INFO* cs = getCharset(col); - if (cs->strnncollsp(getStringPointer(col), getStringLength(col), - r2.getStringPointer(col), r2.getStringLength(col))) + datatypes::Charset cs(getCharset(col)); + if (cs.strnncollsp(getConstString(col), r2.getConstString(col))) { return false; } } else if (UNLIKELY(columnType == execplan::CalpontSystemCatalog::BLOB)) { - if (getStringLength(col) != r2.getStringLength(col)) - return false; - - if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col))) + if (!getConstString(col).eq(r2.getConstString(col))) return false; } else @@ -1573,7 +1514,7 @@ void applyMapping(const int* mapping, const Row& in, Row* out) in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT)) out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), mapping[i]); else if (UNLIKELY(in.isLongString(i))) - out->setStringField(in.getStringPointer(i), in.getStringLength(i), mapping[i]); + out->setStringField(in.getConstString(i), mapping[i]); else if (UNLIKELY(in.isShortString(i))) out->setUintField(in.getUintField(i), mapping[i]); else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE)) diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 79a0f4b29..4f9401036 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -159,7 +159,6 @@ public: inline bool isEmpty() const; inline uint64_t getSize() const; inline bool isNullValue(uint64_t offset) const; - bool equals(const std::string& str, uint64_t offset, CHARSET_INFO* cs) const; void clear(); @@ -427,7 +426,6 @@ public: inline bool equals(T* value, uint32_t colIndex) const; template inline bool equals(uint64_t val, uint32_t colIndex) const; inline bool equals(long double val, uint32_t colIndex) const; - bool equals(const std::string& val, uint32_t colIndex) const; inline bool equals(const int128_t& val, uint32_t colIndex) const; inline double getDoubleField(uint32_t colIndex) const; @@ -480,14 +478,16 @@ public: inline void setRid(uint64_t rid); - // is string efficient for this? - inline std::string getStringField(uint32_t colIndex) const; - inline const uint8_t* getStringPointer(uint32_t colIndex) const; - inline uint32_t getStringLength(uint32_t colIndex) const; + // TODO: remove this (string is not efficient for this), use getConstString() instead + inline std::string getStringField(uint32_t colIndex) const + { + return getConstString(colIndex).toString(); + } + inline utils::ConstString getConstString(uint32_t colIndex) const; inline utils::ConstString getShortConstString(uint32_t colIndex) const; void setStringField(const std::string& val, uint32_t colIndex); - inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex); + inline void setStringField(const utils::ConstString &str, uint32_t colIndex); template inline void setBinaryField(const T* value, uint32_t width, uint32_t colIndex); template @@ -687,7 +687,7 @@ inline uint32_t Row::getRealSize() const if (!inStringTable(i)) ret += getColumnWidth(i); else - ret += getStringLength(i); + ret += getConstString(i).length(); } return ret; @@ -879,21 +879,6 @@ inline int64_t Row::getIntField(uint32_t colIndex) const } } -inline const uint8_t* Row::getStringPointer(uint32_t colIndex) const -{ - if (inStringTable(colIndex)) - return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]])); - - return &data[offsets[colIndex]]; -} - -inline uint32_t Row::getStringLength(uint32_t colIndex) const -{ - if (inStringTable(colIndex)) - return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]])); - - return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)); -} template inline void Row::setBinaryField(const T* value, uint32_t width, uint32_t colIndex) @@ -1040,16 +1025,18 @@ inline void Row::colUpdateHasherTypeless(datatypes::MariaDBHasher &h, uint32_t k } } -inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex) +inline void Row::setStringField(const utils::ConstString &str, uint32_t colIndex) { uint64_t offset; + // TODO: add multi-byte safe truncation here + uint32_t length = str.length(); if (length > getColumnWidth(colIndex)) length = getColumnWidth(colIndex); if (inStringTable(colIndex)) { - offset = strings->storeString(strdata, length); + offset = strings->storeString((const uint8_t*) str.str(), length); *((uint64_t*) &data[offsets[colIndex]]) = offset; // cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]]) // << " length " << *((uint32_t *) &data[offsets[colIndex] + 4]) @@ -1057,22 +1044,12 @@ inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_ } else { - memcpy(&data[offsets[colIndex]], strdata, length); + memcpy(&data[offsets[colIndex]], str.str(), length); memset(&data[offsets[colIndex] + length], 0, offsets[colIndex + 1] - (offsets[colIndex] + length)); } } -inline std::string Row::getStringField(uint32_t colIndex) const -{ - if (inStringTable(colIndex)) - return strings->getString(*((uint64_t*) &data[offsets[colIndex]])); - - // Not all CHAR/VARCHAR are NUL terminated so use length - return std::string((char*) &data[offsets[colIndex]], - strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex))); -} - template inline T* Row::getBinaryField(uint32_t colIndex) const { @@ -1094,7 +1071,7 @@ inline T* Row::getBinaryField_offset(uint32_t offset) const inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const { if (inStringTable(colIndex)) - return getStringField(colIndex); + return getConstString(colIndex).toString(); return std::string((char*) &data[offsets[colIndex] + 2], *((uint16_t*) &data[offsets[colIndex]])); } @@ -1440,7 +1417,7 @@ inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) cons } else if (UNLIKELY(isLongString(srcIndex))) { - out.setStringField(getStringPointer(srcIndex), getStringLength(srcIndex), destIndex); + out.setStringField(getConstString(srcIndex), destIndex); } else if (UNLIKELY(isShortString(srcIndex))) { @@ -2079,7 +2056,7 @@ inline void copyRow(const Row& in, Row* out, uint32_t colCount) } else if (UNLIKELY(in.isLongString(i))) { - out->setStringField(in.getStringPointer(i), in.getStringLength(i), i); + out->setStringField(in.getConstString(i), i); } else if (UNLIKELY(in.isShortString(i))) { diff --git a/utils/windowfunction/idborderby.cpp b/utils/windowfunction/idborderby.cpp index bc5598db5..156cd4def 100644 --- a/utils/windowfunction/idborderby.cpp +++ b/utils/windowfunction/idborderby.cpp @@ -327,15 +327,14 @@ int StringCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2) } else { - int len1 = l->row1().getStringLength(fSpec.fIndex); - int len2 = l->row2().getStringLength(fSpec.fIndex); - const char* s1 = (const char*)l->row1().getStringPointer(fSpec.fIndex); - const char* s2 = (const char*)l->row2().getStringPointer(fSpec.fIndex); + auto const str1 = l->row1().getConstString(fSpec.fIndex); + auto const str2 = l->row2().getConstString(fSpec.fIndex); if (!cs) cs = l->rowGroup()->getCharset(fSpec.fIndex); - ret = fSpec.fAsc * cs->strnncollsp(s1, len1, s2, len2); + ret = fSpec.fAsc * cs->strnncollsp(str1.str(), str1.length(), + str2.str(), str2.length()); } return ret;