/* Copyright (C) 2014 InfiniDB, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /**************************************************************************** * $Id: dataconvert.h 3693 2013-04-05 16:11:30Z chao $ * * ****************************************************************************/ /** @file */ #ifndef DATACONVERT_H #define DATACONVERT_H #include #include #include #include #ifdef _MSC_VER #include #include #include #else #include #endif #include #include "calpontsystemcatalog.h" #include "columnresult.h" #include "exceptclasses.h" // remove this block if the htonll is defined in library #ifdef __linux__ #include #if __BYTE_ORDER == __BIG_ENDIAN // 4312 inline uint64_t htonll(uint64_t n) { return n; } #elif __BYTE_ORDER == __LITTLE_ENDIAN // 1234 inline uint64_t htonll(uint64_t n) { return ((((uint64_t) htonl(n & 0xFFFFFFFFLLU)) << 32) | (htonl((n & 0xFFFFFFFF00000000LLU) >> 32))); } #else // __BYTE_ORDER == __PDP_ENDIAN 3412 inline uint64_t htonll(uint64_t n); // don't know 34127856 or 78563412, hope never be required to support this byte order. #endif #else //!__linux__ #if _MSC_VER < 1600 //Assume we're on little-endian inline uint64_t htonll(uint64_t n) { return ((((uint64_t) htonl(n & 0xFFFFFFFFULL)) << 32) | (htonl((n & 0xFFFFFFFF00000000ULL) >> 32))); } #endif //_MSC_VER #endif //__linux__ // this method evalutes the uint64 that stores a char[] to expected value inline uint64_t uint64ToStr(uint64_t n) { return htonll(n); } #if defined(_MSC_VER) && defined(xxxDATACONVERT_DLLEXPORT) #define EXPORT __declspec(dllexport) #else #define EXPORT #endif const int64_t IDB_pow[19] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000LL, 100000000000LL, 1000000000000LL, 10000000000000LL, 100000000000000LL, 1000000000000000LL, 10000000000000000LL, 100000000000000000LL, 1000000000000000000LL }; namespace dataconvert { enum CalpontDateTimeFormat { CALPONTDATE_ENUM = 1, // date format is: "YYYY-MM-DD" CALPONTDATETIME_ENUM = 2 // date format is: "YYYY-MM-DD HH:MI:SS" }; /** @brief a structure to hold a date */ struct Date { unsigned spare : 6; unsigned day : 6; unsigned month : 4; unsigned year : 16; // NULL column value = 0xFFFFFFFE Date( ) : spare(0x3E), day(0x3F), month(0xF), year(0xFFFF) {} // Construct a Date from a 64 bit integer Calpont date. Date(uint64_t val) : spare(0x3E), day((val >> 6) & 077), month((val >> 12) & 0xF), year((val >> 16)) {} // Construct using passed in parameters, no value checking Date(unsigned y, unsigned m, unsigned d) : spare(0x3E), day(d), month(m), year(y) {} int32_t convertToMySQLint() const; }; inline int32_t Date::convertToMySQLint() const { return (int32_t) (year * 10000) + (month * 100) + day; } /** @brief a structure to hold a datetime */ struct DateTime { unsigned msecond : 20; unsigned second : 6; unsigned minute : 6; unsigned hour : 6; unsigned day : 6; unsigned month : 4; unsigned year : 16; // NULL column value = 0xFFFFFFFFFFFFFFFE DateTime( ) : msecond(0xFFFFE), second(0x3F), minute(0x3F), hour(0x3F), day(0x3F), month(0xF), year(0xFFFF) {} // Construct a DateTime from a 64 bit integer Calpont datetime. DateTime(uint64_t val) : msecond(val & 0xFFFFF), second((val >> 20) & 077), minute((val >> 26) & 077), hour((val >> 32) & 077), day((val >> 38) & 077), month((val >> 44) & 0xF), year(val >> 48) {} // Construct using passed in parameters, no value checking DateTime(unsigned y, unsigned m, unsigned d, unsigned h, unsigned min, unsigned sec, unsigned msec) : msecond(msec), second(sec), minute(min), hour(h), day(d), month(m), year(y) {} int64_t convertToMySQLint() const; void reset(); }; inline int64_t DateTime::convertToMySQLint() const { return (int64_t) (year * 10000000000LL) + (month * 100000000) + (day * 1000000) + (hour * 10000) + (minute * 100) + second; } inline void DateTime::reset() { msecond = 0xFFFFE; second = 0x3F; minute = 0x3F; hour = 0x3F; day = 0x3F; month = 0xF; year = 0xFFFF; } /** @brief a structure to hold a time * range: -838:59:59 ~ 838:59:59 */ struct Time { signed msecond : 24; signed second : 8; signed minute : 8; signed hour : 12; signed day : 12; // NULL column value = 0xFFFFFFFFFFFFFFFE Time() : msecond (0xFFFFFE), second (0xFF), minute (0xFF), hour (0xFFF), day (0xFFF) {} // Construct a Time from a 64 bit integer InfiniDB time. Time(int64_t val) : msecond(val & 0xffffff), second((val >> 24) & 0xff), minute((val >> 32) & 0xff), hour((val >> 40) & 0xfff), day((val >> 52) & 0xfff) {} }; static uint32_t daysInMonth[13] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0}; inline uint32_t getDaysInMonth(uint32_t month) { return ( (month < 1 || month > 12) ? 0 : daysInMonth[month - 1]); } inline bool isLeapYear ( int year) { if ( year % 400 == 0 ) return true; if ( ( year % 4 == 0 ) && ( year % 100 != 0 ) ) return true; return false; } inline bool isDateValid ( int day, int month, int year) { bool valid = true; if ( year == 0 && month == 0 && year == 0 ) { return true; } int daycheck = getDaysInMonth( month ); if ( month == 2 && isLeapYear( year ) ) // 29 days in February in a leap year daycheck = 29; if ( ( year < 1000 ) || ( year > 9999 ) ) valid = false; else if ( month < 1 || month > 12 ) valid = false; else if ( day < 1 || day > daycheck ) valid = false; return ( valid ); } inline bool isDateTimeValid ( int hour, int minute, int second, int microSecond) { bool valid = false; if ( hour >= 0 && hour <= 24 ) { if ( minute >= 0 && minute < 60 ) { if ( second >= 0 && second < 60 ) { if ( microSecond >= 0 && microSecond <= 999999 ) { valid = true; } } } } return valid; } inline int64_t string_to_ll( const std::string& data, bool& bSaturate ) { // This function doesn't take into consideration our special values // for NULL and EMPTY when setting the saturation point. Should it? char* ep = NULL; const char* str = data.c_str(); errno = 0; int64_t value = strtoll(str, &ep, 10); // (no digits) || (more chars) || (other errors & value = 0) if ((ep == str) || (*ep != '\0') || (errno != 0 && value == 0)) throw logging::QueryDataExcept("value is not numerical.", logging::formatErr); if (errno == ERANGE && (value == std::numeric_limits::max() || value == std::numeric_limits::min())) bSaturate = true; return value; } inline uint64_t string_to_ull( const std::string& data, bool& bSaturate ) { // This function doesn't take into consideration our special values // for NULL and EMPTY when setting the saturation point. Should it? char* ep = NULL; const char* str = data.c_str(); errno = 0; // check for negative number. saturate to 0; if (data.find('-') != data.npos) { bSaturate = true; return 0; } uint64_t value = strtoull(str, &ep, 10); // (no digits) || (more chars) || (other errors & value = 0) if ((ep == str) || (*ep != '\0') || (errno != 0 && value == 0)) throw logging::QueryDataExcept("value is not numerical.", logging::formatErr); if (errno == ERANGE && (value == std::numeric_limits::max())) bSaturate = true; return value; } /** @brief DataConvert is a component for converting string data to Calpont format */ class DataConvert { public: /** * @brief convert a columns data, represnted as a string, to it's native * format * * @param type the columns data type * @param data the columns string representation of it's data */ EXPORT static boost::any convertColumnData( const execplan::CalpontSystemCatalog::ColType& colType, const std::string& dataOrig, bool& bSaturate, bool nulFlag = false, bool noRoundup = false, bool isUpdate = false); /** * @brief convert a columns data from native format to a string * * @param type the columns database type * @param data the columns string representation of it's data */ EXPORT static std::string dateToString( int datevalue ); static inline void dateToString( int datevalue, char* buf, unsigned int buflen ); /** * @brief convert a columns data from native format to a string * * @param type the columns database type * @param data the columns string representation of it's data */ EXPORT static std::string datetimeToString( long long datetimevalue ); static inline void datetimeToString( long long datetimevalue, char* buf, unsigned int buflen ); /** * @brief convert a columns data from native format to a string * * @param type the columns database type * @param data the columns string representation of it's data */ EXPORT static std::string dateToString1( int datevalue ); static inline void dateToString1( int datevalue, char* buf, unsigned int buflen ); /** * @brief convert a columns data from native format to a string * * @param type the columns database type * @param data the columns string representation of it's data */ EXPORT static std::string datetimeToString1( long long datetimevalue ); static inline void datetimeToString1( long long datetimevalue, char* buf, unsigned int buflen ); /** * @brief convert a date column data, represnted as a string, to it's native * format. This function is for bulkload to use. * * @param type the columns data type * @param dataOrig the columns string representation of it's data * @param dateFormat the format the date value in * @param status 0 - success, -1 - fail * @param dataOrgLen length specification of dataOrg */ EXPORT static int32_t convertColumnDate( const char* dataOrg, CalpontDateTimeFormat dateFormat, int& status, unsigned int dataOrgLen ); /** * @brief Is specified date valid; used by binary bulk load */ EXPORT static bool isColumnDateValid( int32_t date ); /** * @brief convert a datetime column data, represented as a string, * to it's native format. This function is for bulkload to use. * * @param type the columns data type * @param dataOrig the columns string representation of it's data * @param datetimeFormat the format the date value in * @param status 0 - success, -1 - fail * @param dataOrgLen length specification of dataOrg */ EXPORT static int64_t convertColumnDatetime( const char* dataOrg, CalpontDateTimeFormat datetimeFormat, int& status, unsigned int dataOrgLen ); /** * @brief Is specified datetime valid; used by binary bulk load */ EXPORT static bool isColumnDateTimeValid( int64_t dateTime ); EXPORT static bool isNullData(execplan::ColumnResult* cr, int rownum, execplan::CalpontSystemCatalog::ColType colType); static inline std::string decimalToString(int64_t value, uint8_t scale, execplan::CalpontSystemCatalog::ColDataType colDataType); static inline void decimalToString(int64_t value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType); static inline std::string constructRegexp(const std::string& str); static inline bool isEscapedChar(char c) { return ('%' == c || '_' == c); } // convert string to date EXPORT static int64_t stringToDate(const std::string& data); // convert string to datetime EXPORT static int64_t stringToDatetime(const std::string& data, bool* isDate = NULL); // convert integer to date EXPORT static int64_t intToDate(int64_t data); // convert integer to datetime EXPORT static int64_t intToDatetime(int64_t data, bool* isDate = NULL); // convert string to date. alias to stringToDate EXPORT static int64_t dateToInt(const std::string& date); // convert string to datetime. alias to datetimeToInt EXPORT static int64_t datetimeToInt(const std::string& datetime); EXPORT static int64_t stringToTime (const std::string& data); // bug4388, union type conversion EXPORT static execplan::CalpontSystemCatalog::ColType convertUnionColType(std::vector&); }; inline void DataConvert::dateToString( int datevalue, char* buf, unsigned int buflen) { snprintf( buf, buflen, "%04d-%02d-%02d", (unsigned)((datevalue >> 16) & 0xffff), (unsigned)((datevalue >> 12) & 0xf), (unsigned)((datevalue >> 6) & 0x3f) ); } inline void DataConvert::datetimeToString( long long datetimevalue, char* buf, unsigned int buflen ) { snprintf( buf, buflen, "%04d-%02d-%02d %02d:%02d:%02d", (unsigned)((datetimevalue >> 48) & 0xffff), (unsigned)((datetimevalue >> 44) & 0xf), (unsigned)((datetimevalue >> 38) & 0x3f), (unsigned)((datetimevalue >> 32) & 0x3f), (unsigned)((datetimevalue >> 26) & 0x3f), (unsigned)((datetimevalue >> 20) & 0x3f) ); } inline void DataConvert::dateToString1( int datevalue, char* buf, unsigned int buflen) { snprintf( buf, buflen, "%04d%02d%02d", (unsigned)((datevalue >> 16) & 0xffff), (unsigned)((datevalue >> 12) & 0xf), (unsigned)((datevalue >> 6) & 0x3f) ); } inline void DataConvert::datetimeToString1( long long datetimevalue, char* buf, unsigned int buflen ) { snprintf( buf, buflen, "%04d%02d%02d%02d%02d%02d", (unsigned)((datetimevalue >> 48) & 0xffff), (unsigned)((datetimevalue >> 44) & 0xf), (unsigned)((datetimevalue >> 38) & 0x3f), (unsigned)((datetimevalue >> 32) & 0x3f), (unsigned)((datetimevalue >> 26) & 0x3f), (unsigned)((datetimevalue >> 20) & 0x3f) ); } inline std::string DataConvert::decimalToString(int64_t value, uint8_t scale, execplan::CalpontSystemCatalog::ColDataType colDataType) { char buf[80]; DataConvert::decimalToString(value, scale, buf, 80, colDataType); return std::string(buf); } inline void DataConvert::decimalToString(int64_t int_val, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType) { // Need to convert a string with a binary unsigned number in it to a 64-bit signed int // MySQL seems to round off values unless we use the string store method. Groan. // Taken from ha_calpont_impl.cpp //biggest Calpont supports is DECIMAL(18,x), or 18 total digits+dp+sign for column // Need 19 digits maxium to hold a sum result of 18 digits decimal column. if (isUnsigned(colDataType)) { #ifndef __LP64__ snprintf(buf, buflen, "%llu", static_cast(int_val)); #else snprintf(buf, buflen, "%lu", static_cast(int_val)); #endif } else { #ifndef __LP64__ snprintf(buf, buflen, "%lld", int_val); #else snprintf(buf, buflen, "%ld", int_val); #endif } if (scale == 0) return; //we want to move the last dt_scale chars right by one spot to insert the dp //we want to move the trailing null as well, so it's really dt_scale+1 chars size_t l1 = strlen(buf); char* ptr = &buf[0]; if (int_val < 0) { ptr++; idbassert(l1 >= 2); l1--; } //need to make sure we have enough leading zeros for this to work... //at this point scale is always > 0 size_t l2 = 1; if ((unsigned)scale > l1) { const char* zeros = "00000000000000000000"; //20 0's size_t diff = 0; if (int_val != 0) diff = scale - l1; //this will always be > 0 else diff = scale; memmove((ptr + diff), ptr, l1 + 1); //also move null memcpy(ptr, zeros, diff); if (int_val != 0) l1 = 0; else l1 = 1; } else if ((unsigned)scale == l1) { l1 = 0; l2 = 2; } else { l1 -= scale; } memmove((ptr + l1 + l2), (ptr + l1), scale + 1); //also move null if (l2 == 2) *(ptr + l1++) = '0'; *(ptr + l1) = '.'; } //FIXME: copy/pasted from dictionary.cpp: refactor inline std::string DataConvert::constructRegexp(const std::string& str) { //In the worst case, every char is quadrupled, plus some leading/trailing cruft... char* cBuf = (char*)alloca(((4 * str.length()) + 3) * sizeof(char)); char c; uint32_t i, cBufIdx = 0; // translate to regexp symbols cBuf[cBufIdx++] = '^'; // implicit leading anchor for (i = 0; i < str.length(); i++) { c = (char) str.c_str()[i]; switch (c) { // chars to substitute case '%': cBuf[cBufIdx++] = '.'; cBuf[cBufIdx++] = '*'; break; case '_': cBuf[cBufIdx++] = '.'; break; // escape the chars that are special in regexp's but not in SQL // default special characters in perl: .[{}()\*+?|^$ case '.': case '*': case '^': case '$': case '?': case '+': case '|': case '[': case '{': case '}': case '(': case ')': cBuf[cBufIdx++] = '\\'; cBuf[cBufIdx++] = c; break; case '\\': //this is the sql escape char if ( i + 1 < str.length()) { if (isEscapedChar(str.c_str()[i + 1])) { cBuf[cBufIdx++] = str.c_str()[++i]; break; } else if ('\\' == str.c_str()[i + 1]) { cBuf[cBufIdx++] = c; cBuf[cBufIdx++] = str.c_str()[++i]; break; } } //single slash cBuf[cBufIdx++] = '\\'; cBuf[cBufIdx++] = c; break; default: cBuf[cBufIdx++] = c; } } cBuf[cBufIdx++] = '$'; // implicit trailing anchor cBuf[cBufIdx++] = '\0'; #ifdef VERBOSE cerr << "regexified string is " << cBuf << endl; #endif return cBuf; } } // namespace dataconvert #undef EXPORT #endif //DATACONVERT_H