You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-31 18:30:33 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			941 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			941 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (C) 2014 InfiniDB, Inc.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA. */
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * $Id: we_convertor.cpp 4726 2013-08-07 03:38:36Z bwilkinson $
 | |
|  *
 | |
|  *******************************************************************************/
 | |
| /** @file */
 | |
| 
 | |
| #include <unistd.h>
 | |
| #include <fnmatch.h>
 | |
| #include <limits>
 | |
| #include <cstring>
 | |
| #include <vector>
 | |
| #include "mcsconfig.h"
 | |
| 
 | |
| #include "we_convertor.h"
 | |
| #include "numericliteral.h"
 | |
| 
 | |
| using namespace std;
 | |
| using namespace execplan;
 | |
| namespace
 | |
| {
 | |
| const char DATE_TIME_FORMAT[] = "%04d-%02d-%02d %02d:%02d:%02d";
 | |
| // ColumnStore file `full file name` format.
 | |
| const char CS_FULL_FILENAME_FORMAT[] =
 | |
|     "*[0-9][0-9][0-9].dir/[0-9][0-9][0-9].dir/[[0-9][0-9][0-9].dir/"
 | |
|     "[0-9][0-9][0-9].dir/[0-9][0-9][0-9].dir/FILE[0-9][0-9][0-9].cdf";
 | |
| // ColumnStore file `directory name` format.
 | |
| const char CS_DIR_FORMAT[] = "[0-9][0-9][0-9].dir";
 | |
| // ColumnStore file `file name` format.
 | |
| const char CS_FILE_FORMAT[] = "FILE[0-9][0-9][0-9].cdf";
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Takes an 8-bit value and converts it into a directory name.
 | |
|  * PARAMETERS:
 | |
|  *    pBuffer(output) - a pointer to the output buffer
 | |
|  *    blen   (input)  - the length of the output buffer (in bytes)
 | |
|  *    val    (input)  - value to be used in the formatted name
 | |
|  * RETURN:
 | |
|  *    the number of characters printed in the output buffer (not including
 | |
|  *    the null terminator).  -1 is returned if the input buffer pointer is NULL.
 | |
|  ******************************************************************************/
 | |
| int _doDir(char* pBuffer, int blen, unsigned int val)
 | |
| {
 | |
|   int rc;
 | |
| 
 | |
|   if (!pBuffer)
 | |
|   {
 | |
|     rc = -1;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     rc = snprintf(pBuffer, blen, "%03u.dir", val);
 | |
|     pBuffer[blen - 1] = (char)0;
 | |
|   }
 | |
| 
 | |
|   return rc;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Takes an 8-bit value and converts it into a file name.
 | |
|  * PARAMETERS:
 | |
|  *    pBuffer(output) - a pointer to the output buffer
 | |
|  *    blen   (input)  - the length of the output buffer (in bytes)
 | |
|  *    val    (input)  - value to be used in the formatted name
 | |
|  * RETURN:
 | |
|  *    the number of characters printed in the output buffer (not including
 | |
|  *    the null terminator).  -1 is returned if the input buffer pointer is NULL.
 | |
|  ******************************************************************************/
 | |
| int _doFile(char* pBuffer, int blen, unsigned char val)
 | |
| {
 | |
|   int rc;
 | |
| 
 | |
|   if (!pBuffer)
 | |
|   {
 | |
|     rc = -1;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     rc = snprintf(pBuffer, blen, "FILE%03d.cdf", val);
 | |
|     pBuffer[blen - 1] = (char)0;
 | |
|   }
 | |
| 
 | |
|   return rc;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Takes a buffer in ColumnStore `text` format and converts it to an
 | |
|  *    integer.
 | |
|  * PARAMETERS:
 | |
|  *    buffer(input)  - a pointer to the input buffer.
 | |
|  *    fmt   (input)  - a pointer to the format buffer.
 | |
|  *    offset(input)  - an offset.
 | |
|  *    val   (output) - converted integer.
 | |
|  * RETURN:
 | |
|  *   0 is returned on success, -1 is returned on error.
 | |
|  ******************************************************************************/
 | |
| int32_t _fromText(const char* buffer, const char* fmt, uint32_t offset, uint32_t& val)
 | |
| {
 | |
|   int32_t rc = -1;
 | |
|   // Number length in characters.
 | |
|   const uint32_t numberLen = 3;
 | |
| 
 | |
|   // Check that buffer is in the correct `directory` format.
 | |
|   if (buffer && (fnmatch(fmt, buffer, 0) == 0))
 | |
|   {
 | |
|     datatypes::DataCondition error;
 | |
|     val = literal::UnsignedInteger(buffer + offset, numberLen).toXIntPositive<uint32_t>(error);
 | |
|     // The number cannot exceed 0xff.
 | |
|     if (!error && val < 256)
 | |
|       rc = 0;
 | |
|   }
 | |
| 
 | |
|   return rc;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Takes a buffer in ColumnStore `directory` format and converts it to an
 | |
|  *    integer.
 | |
|  * PARAMETERS:
 | |
|  *    buffer(input)  - a pointer to the input buffer.
 | |
|       val   (output) - converted integer.
 | |
|  * RETURN:
 | |
|  *   0 is returned on success, -1 is returned on error.
 | |
|  ******************************************************************************/
 | |
| int32_t _fromDir(const char* buffer, uint32_t& val)
 | |
| {
 | |
|   return _fromText(buffer, CS_DIR_FORMAT, 0, val);
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Takes a buffer in ColumnStore `file` format and converts it to an
 | |
|  *    integer.
 | |
|  * PARAMETERS:
 | |
|  *    buffer(input)  - a pointer to the input buffer.
 | |
|       val   (output) - converted integer.
 | |
|  * RETURN:
 | |
|  *   0 is returned on success, -1 is returned on error.
 | |
|  ******************************************************************************/
 | |
| int32_t _fromFile(const char* buffer, uint32_t& val)
 | |
| {
 | |
|   return _fromText(buffer, CS_FILE_FORMAT, 4, val);
 | |
| }
 | |
| 
 | |
| }  // namespace
 | |
| 
 | |
| namespace WriteEngine
 | |
| {
 | |
| // This struct represents a path to a segment file.
 | |
| struct Convertor::dmFilePathArgs_t
 | |
| {
 | |
|   struct dmFilePathPart_t
 | |
|   {
 | |
|     char* pName;  // Name buffer.
 | |
|     int32_t len;  // Size in bytes of a buffer.
 | |
|   };
 | |
|   // A, B, C, D, E and file name.
 | |
|   dmFilePathPart_t pathPart[6];
 | |
| 
 | |
|   // Initialize `dmFilePathArgs_t` from the given buffer.
 | |
|   void initialize(char buff[6][MAX_DB_DIR_NAME_SIZE])
 | |
|   {
 | |
|     // A, B, C, D, E directories.
 | |
|     for (uint32_t i = 0; i < 6; ++i)
 | |
|     {
 | |
|       pathPart[i].pName = buff[i];
 | |
|       pathPart[i].len = sizeof(buff[i]);
 | |
|     }
 | |
|   }
 | |
| };
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Get time string
 | |
|  * PARAMETERS:
 | |
|  *    none
 | |
|  * RETURN:
 | |
|  *    time string
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| const std::string Convertor::getTimeStr()
 | |
| {
 | |
|   char buf[sizeof(DATE_TIME_FORMAT) + 10] = {0};
 | |
|   time_t curTime = time(nullptr);
 | |
|   struct tm pTime;
 | |
|   localtime_r(&curTime, &pTime);
 | |
|   string timeStr;
 | |
| 
 | |
|   snprintf(buf, sizeof(buf), DATE_TIME_FORMAT, pTime.tm_year + 1900, pTime.tm_mon + 1, pTime.tm_mday,
 | |
|            pTime.tm_hour, pTime.tm_min, pTime.tm_sec);
 | |
| 
 | |
|   timeStr = buf;
 | |
| 
 | |
|   return timeStr;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Convert int value to string
 | |
|  * PARAMETERS:
 | |
|  *    val - value
 | |
|  * RETURN:
 | |
|  *    string
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| const std::string Convertor::int2Str(int val)
 | |
| {
 | |
|   char buf[12];
 | |
|   string myStr;
 | |
| 
 | |
|   snprintf(buf, sizeof(buf), "%d", val);
 | |
|   myStr = buf;
 | |
| 
 | |
|   return myStr;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Convert a numeric string to a decimal long long, given the specified
 | |
|  *    scale.  errno should be checked upon return from this function to see
 | |
|  *    if it is set to ERANGE, meaning the value was out of range.
 | |
|  * PARAMETERS:
 | |
|  *    field       - string to be interpreted
 | |
|  *    fieldLength - length of "field"
 | |
|  *    scale       - decimal scale value to be used to parse "field"
 | |
|  * RETURN:
 | |
|  *    converted long long for specified "field"
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| 
 | |
| long long Convertor::convertDecimalString(const char* field, int /*fieldLength*/, int scale)
 | |
| {
 | |
|   long double dval = strtold(field, nullptr);
 | |
|   long long ret = 0;
 | |
| 
 | |
|   // move scale digits to the left of the decimal point
 | |
|   for (int i = 0; i < scale; i++)
 | |
|     dval *= 10;
 | |
| 
 | |
|   // range check against int64
 | |
|   if (dval > LLONG_MAX)
 | |
|   {
 | |
|     errno = ERANGE;
 | |
|     return LLONG_MAX;
 | |
|   }
 | |
|   if (dval < LLONG_MIN)
 | |
|   {
 | |
|     errno = ERANGE;
 | |
|     return LLONG_MIN;
 | |
|   }
 | |
|   errno = 0;
 | |
| 
 | |
|   ret = dval;
 | |
| 
 | |
|   // get the fractional part of what's left & round ret up or down.
 | |
|   dval -= ret;
 | |
|   if (dval >= 0.5 && ret < LLONG_MAX)
 | |
|     ++ret;
 | |
|   else if (dval <= -0.5 && ret > LLONG_MIN)
 | |
|     --ret;
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Convert an oid to a filename (with partition and segment number
 | |
|  *    in the filepath.
 | |
|  * PARAMETERS:
 | |
|  *    fid - fid
 | |
|  *    fullFileName - file name
 | |
|  *    dbDirName - components of fullFileName
 | |
|  *    partition - partition number to be in used in filepath
 | |
|  *    segment   - segment number to be used in filename
 | |
|  * RETURN:
 | |
|  *    NO_ERROR if success, other if fail
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| int Convertor::oid2FileName(FID fid, char* fullFileName, char dbDirName[][MAX_DB_DIR_NAME_SIZE],
 | |
|                             uint32_t partition, uint16_t segment)
 | |
| {
 | |
|   dmFilePathArgs_t args;
 | |
|   int rc;
 | |
| 
 | |
|   // Initialize.
 | |
|   char buff[6][MAX_DB_DIR_NAME_SIZE];
 | |
|   args.initialize(buff);
 | |
| 
 | |
|   RETURN_ON_WE_ERROR((rc = dmOid2FPath(fid, partition, segment, &args)), ERR_DM_CONVERT_OID);
 | |
|   sprintf(fullFileName, "%s/%s/%s/%s/%s/%s", args.pathPart[0].pName, args.pathPart[1].pName,
 | |
|           args.pathPart[2].pName, args.pathPart[3].pName, args.pathPart[4].pName, args.pathPart[5].pName);
 | |
| 
 | |
|   for (uint32_t i = 0; i < 6; ++i)
 | |
|   {
 | |
|     strcpy(dbDirName[i], args.pathPart[i].pName);
 | |
|   }
 | |
| 
 | |
|   //  std::cout << "OID: " << fid <<
 | |
|   //       " mapping to file: " << fullFileName <<std::endl;
 | |
| 
 | |
|   return NO_ERROR;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Convert the given filename to an oid, segment and partition.
 | |
|  * PARAMETERS:
 | |
|  *    fullFileName INPUT  -- filename.
 | |
|  *    oid          OUTPUT -- oid number from the given filename.
 | |
|  *    partition    OUTPUT -- partition number from the given filename.
 | |
|  *    segment      OUTPUT -- segment number from the given fielname.
 | |
|  * RETURN:
 | |
|  *    NO_ERROR if success, other if fail.
 | |
|  ******************************************************************************/
 | |
| int Convertor::fileName2Oid(const std::string& fullFileName, uint32_t& oid, uint32_t& partition,
 | |
|                             uint32_t& segment)
 | |
| {
 | |
|   // ColumnStore file directory separator.
 | |
|   const char dirSep = '/';
 | |
|   // The number of the directories in the ColumnStore file name.
 | |
|   // Note: without `DBRoot` directory.
 | |
|   const uint32_t dirNamesMaxSize = 6;
 | |
|   const uint32_t fullFileNameLen = fullFileName.size();
 | |
| 
 | |
|   // Verify the given `fullFileName`.
 | |
|   if (!fullFileNameLen ||
 | |
|       // If not match `fnmatch` returns a result code which is not equal to
 | |
|       // zero.
 | |
|       // TODO: Probably we should use `std::regex_match`, but currently
 | |
|       // there are still parts of code which use legacy `char *`, so
 | |
|       // `std::regex_match` is not applicable there without creating
 | |
|       // additional `std::string` from `char *`.
 | |
|       fnmatch(CS_FULL_FILENAME_FORMAT, fullFileName.c_str(), 0))
 | |
|   {
 | |
|     return -1;
 | |
|   }
 | |
| 
 | |
|   std::vector<std::string> dirNames;
 | |
|   // We need exact 6 instances.
 | |
|   dirNames.reserve(6);
 | |
| 
 | |
|   uint32_t end = fullFileNameLen;
 | |
|   // Signed integer for `index` since it could be less than zero.
 | |
|   int32_t index = fullFileNameLen - 1;
 | |
| 
 | |
|   // Iterate over `fullFileName` starting from the end and split it by
 | |
|   // directory separator. Since we starting from the end we need just 6
 | |
|   // instances to match ColumnStore file name format specification.
 | |
|   while (index >= 0 && dirNames.size() < dirNamesMaxSize)
 | |
|   {
 | |
|     while (index >= 0 && fullFileName[index] != dirSep)
 | |
|     {
 | |
|       --index;
 | |
|     }
 | |
| 
 | |
|     // Begin is a `dirSep` index + 1.
 | |
|     uint32_t begin = index + 1;
 | |
|     const uint32_t dirNameLen = end - begin;
 | |
|     // We already checked the `fullFileName` format,
 | |
|     // but this check is only intended to make sure that this algo works
 | |
|     // correctly on any input, if something changes.
 | |
|     if (dirNameLen > 0 && dirNameLen < MAX_DB_DIR_NAME_SIZE)
 | |
|     {
 | |
|       dirNames.push_back(fullFileName.substr(begin, dirNameLen));
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       // Something wrong with filename, just return an error.
 | |
|       return -1;
 | |
|     }
 | |
|     // Set `end` to the last directory separator index.
 | |
|     end = index;
 | |
|     // Skip current directory separator.
 | |
|     --index;
 | |
|   }
 | |
| 
 | |
|   // Make sure we parsed 6 instances.
 | |
|   if (dirNames.size() != 6)
 | |
|   {
 | |
|     return -1;
 | |
|   }
 | |
| 
 | |
|   // Initialize `dmFilePathArgs_t` struct.
 | |
|   char buff[6][MAX_DB_DIR_NAME_SIZE];
 | |
| 
 | |
|   dmFilePathArgs_t args;
 | |
|   args.initialize(buff);
 | |
| 
 | |
|   // Populate `dmFilePathArgs_t` struct with the given names.
 | |
|   // Starting from the E directory.
 | |
|   for (uint32_t i = 0, dirCount = 5; i <= dirCount; ++i)
 | |
|   {
 | |
|     strcpy(args.pathPart[dirCount - i].pName, dirNames[i].c_str());
 | |
|   }
 | |
| 
 | |
|   // FIXME: Currently used ERR_DM_CONVERT_OID, should we introduce new error
 | |
|   // code?
 | |
|   RETURN_ON_WE_ERROR(dmFPath2Oid(args, oid, partition, segment), ERR_DM_CONVERT_OID);
 | |
| 
 | |
|   return NO_ERROR;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Map specified errno to the associated error message string.
 | |
|  * PARAMETERS:
 | |
|  *    errNum  - errno to be converted
 | |
|  *    errString-(output) error message string associated with errNum
 | |
|  * RETURN:
 | |
|  *    none
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| void Convertor::mapErrnoToString(int errNum, std::string& errString)
 | |
| {
 | |
|   char errnoMsgBuf[1024];
 | |
| #if STRERROR_R_CHAR_P
 | |
|   char* errnoMsg = strerror_r(errNum, errnoMsgBuf, sizeof(errnoMsgBuf));
 | |
| 
 | |
|   if (errnoMsg)
 | |
|     errString = errnoMsg;
 | |
|   else
 | |
|     errString.clear();
 | |
| 
 | |
| #else
 | |
|   int errnoMsg = strerror_r(errNum, errnoMsgBuf, sizeof(errnoMsgBuf));
 | |
| 
 | |
|   if (errnoMsg == 0)
 | |
|     errString = errnoMsgBuf;
 | |
|   else
 | |
|     errString.clear();
 | |
| 
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Convert specified ColDataType to internal storage type (ColType).
 | |
|  * PARAMETERS:
 | |
|  *    dataType     - Interface data-type
 | |
|  *    internalType - Internal data-type used for storing
 | |
|  * RETURN:
 | |
|  *    none
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| void Convertor::convertColType(CalpontSystemCatalog::ColDataType dataType, int colWidth,
 | |
|                                ColType& internalType, bool isToken)
 | |
| {
 | |
|   if (isToken)
 | |
|   {
 | |
|     internalType = WriteEngine::WR_TOKEN;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   switch (dataType)
 | |
|   {
 | |
|     // Map BIT and TINYINT to WR_BYTE
 | |
|     case CalpontSystemCatalog::BIT:
 | |
|     case CalpontSystemCatalog::TINYINT: internalType = WriteEngine::WR_BYTE; break;
 | |
| 
 | |
|     // Map SMALLINT to WR_SHORT
 | |
|     case CalpontSystemCatalog::SMALLINT: internalType = WriteEngine::WR_SHORT; break;
 | |
| 
 | |
|     // Map MEDINT to WR_MEDINT
 | |
|     case CalpontSystemCatalog::MEDINT: internalType = WriteEngine::WR_MEDINT; break;
 | |
| 
 | |
|     // Map INT, and DATE to WR_INT
 | |
|     case CalpontSystemCatalog::INT:
 | |
|     case CalpontSystemCatalog::DATE: internalType = WriteEngine::WR_INT; break;
 | |
| 
 | |
|     // Map FLOAT and UFLOAT to WR_FLOAT
 | |
|     case CalpontSystemCatalog::FLOAT:
 | |
|     case CalpontSystemCatalog::UFLOAT: internalType = WriteEngine::WR_FLOAT; break;
 | |
| 
 | |
|     // Map BIGINT, DATETIME, TIMESTAMP, and TIME to WR_LONGLONG
 | |
|     case CalpontSystemCatalog::BIGINT:
 | |
|     case CalpontSystemCatalog::DATETIME:
 | |
|     case CalpontSystemCatalog::TIMESTAMP:
 | |
|     case CalpontSystemCatalog::TIME: internalType = WriteEngine::WR_LONGLONG; break;
 | |
| 
 | |
|     // Map DOUBLE and UDOUBLE to WR_DOUBLE
 | |
|     case CalpontSystemCatalog::DOUBLE:
 | |
|     case CalpontSystemCatalog::UDOUBLE: internalType = WriteEngine::WR_DOUBLE; break;
 | |
| 
 | |
|     // Map DECIMAL to applicable integer type
 | |
|     case CalpontSystemCatalog::DECIMAL:
 | |
|     case CalpontSystemCatalog::UDECIMAL:
 | |
|     {
 | |
|       switch (colWidth)
 | |
|       {
 | |
|         case 1: internalType = WriteEngine::WR_BYTE; break;
 | |
| 
 | |
|         case 2: internalType = WriteEngine::WR_SHORT; break;
 | |
| 
 | |
|         case 4: internalType = WriteEngine::WR_INT; break;
 | |
| 
 | |
|         case 8: internalType = WriteEngine::WR_LONGLONG; break;
 | |
| 
 | |
|         default: internalType = WriteEngine::WR_BINARY; break;
 | |
|       }
 | |
| 
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     // Map BLOB to WR_BLOB
 | |
|     case CalpontSystemCatalog::BLOB: internalType = WriteEngine::WR_BLOB; break;
 | |
| 
 | |
|     // Map TEXT to WR_TEXT
 | |
|     case CalpontSystemCatalog::TEXT: internalType = WriteEngine::WR_TEXT; break;
 | |
| 
 | |
|     // Map VARBINARY to WR_VARBINARY
 | |
|     case CalpontSystemCatalog::VARBINARY: internalType = WriteEngine::WR_VARBINARY; break;
 | |
| 
 | |
|     // Map CHAR, VARCHAR, and CLOB to WR_CHAR
 | |
|     case CalpontSystemCatalog::CHAR:
 | |
|     case CalpontSystemCatalog::VARCHAR:
 | |
|     case CalpontSystemCatalog::CLOB: internalType = WriteEngine::WR_CHAR; break;
 | |
| 
 | |
|     // Map UTINYINT to WR_UBYTE
 | |
|     case CalpontSystemCatalog::UTINYINT: internalType = WriteEngine::WR_UBYTE; break;
 | |
| 
 | |
|     // Map USMALLINT to WR_USHORT
 | |
|     case CalpontSystemCatalog::USMALLINT: internalType = WriteEngine::WR_USHORT; break;
 | |
| 
 | |
|     // Map UMEDINT to WR_UMEDINT
 | |
|     case CalpontSystemCatalog::UMEDINT: internalType = WriteEngine::WR_UMEDINT; break;
 | |
| 
 | |
|     // Map UINT to WR_UINT
 | |
|     case CalpontSystemCatalog::UINT: internalType = WriteEngine::WR_UINT; break;
 | |
| 
 | |
|     // Map UBIGINT, TIMESTAMP to WR_ULONGLONG
 | |
|     case CalpontSystemCatalog::UBIGINT: internalType = WriteEngine::WR_ULONGLONG; break;
 | |
| 
 | |
|     default: internalType = WriteEngine::WR_CHAR; break;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Convert specified internal storage type (ColType) to ColDataType. Since
 | |
|  *    there is a one to many relationship, we choose the most general.
 | |
|  * PARAMETERS:
 | |
|  *    internalType - Internal data-type used for storing
 | |
|  *    dataType     - Interface data-type
 | |
|  * RETURN:
 | |
|  *    none
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| void Convertor::convertWEColType(ColType internalType, CalpontSystemCatalog::ColDataType& dataType)
 | |
| {
 | |
|   switch (internalType)
 | |
|   {
 | |
|     // Map BIT and TINYINT to WR_BYTE
 | |
|     case WriteEngine::WR_BYTE: dataType = CalpontSystemCatalog::TINYINT; break;
 | |
| 
 | |
|     // Map SMALLINT to WR_SHORT
 | |
|     case WriteEngine::WR_SHORT: dataType = CalpontSystemCatalog::SMALLINT; break;
 | |
| 
 | |
|     // Map MEDINT to WR_MEDINT
 | |
|     case WriteEngine::WR_MEDINT: dataType = CalpontSystemCatalog::MEDINT; break;
 | |
| 
 | |
|     // Map INT, and DATE to WR_INT
 | |
|     case WriteEngine::WR_INT: dataType = CalpontSystemCatalog::INT; break;
 | |
| 
 | |
|     // Map FLOAT and UFLOAT to WR_FLOAT
 | |
|     case WriteEngine::WR_FLOAT: dataType = CalpontSystemCatalog::FLOAT; break;
 | |
| 
 | |
|     // Map BIGINT, DATETIME, TIME, and TIMESTAMP to WR_LONGLONG
 | |
|     case WriteEngine::WR_LONGLONG: dataType = CalpontSystemCatalog::BIGINT; break;
 | |
| 
 | |
|     // Map DOUBLE and UDOUBLE to WR_DOUBLE
 | |
|     case WriteEngine::WR_DOUBLE: dataType = CalpontSystemCatalog::DOUBLE; break;
 | |
| 
 | |
|     // Map BLOB to WR_BLOB
 | |
|     case WriteEngine::WR_BLOB: dataType = CalpontSystemCatalog::BLOB; break;
 | |
| 
 | |
|     // Map TEXT to WR_TEXT
 | |
|     case WriteEngine::WR_TEXT: dataType = CalpontSystemCatalog::TEXT; break;
 | |
| 
 | |
|     // Map VARBINARY to WR_VARBINARY
 | |
|     case WriteEngine::WR_VARBINARY: dataType = CalpontSystemCatalog::VARBINARY; break;
 | |
| 
 | |
|     // Map CHAR, VARCHAR, and CLOB to WR_CHAR
 | |
|     case WriteEngine::WR_CHAR: dataType = CalpontSystemCatalog::CHAR; break;
 | |
| 
 | |
|     // Map UTINYINT to WR_UBYTE
 | |
|     case WriteEngine::WR_UBYTE: dataType = CalpontSystemCatalog::UTINYINT; break;
 | |
| 
 | |
|     // Map USMALLINT to WR_USHORT
 | |
|     case WriteEngine::WR_USHORT: dataType = CalpontSystemCatalog::USMALLINT; break;
 | |
| 
 | |
|     // Map UMEDINT to WR_UMEDINT
 | |
|     case WriteEngine::WR_UMEDINT: dataType = CalpontSystemCatalog::UMEDINT; break;
 | |
| 
 | |
|     // Map UINT to WR_UINT
 | |
|     case WriteEngine::WR_UINT: dataType = CalpontSystemCatalog::UINT; break;
 | |
| 
 | |
|     // Map UBIGINT and TIMESTAMP to WR_ULONGLONG
 | |
|     case WriteEngine::WR_ULONGLONG: dataType = CalpontSystemCatalog::UBIGINT; break;
 | |
| 
 | |
|     default: dataType = CalpontSystemCatalog::CHAR; break;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Convert curStruct from an interface-type struct to an internal-type
 | |
|  *    struct.  curStruct is handled as a ColStruct.
 | |
|  * PARAMETERS:
 | |
|  *    curStruct - column struct to be initialized
 | |
|  * RETURN:
 | |
|  *    none
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| void Convertor::convertColType(ColStruct* curStruct)
 | |
| {
 | |
|   CalpontSystemCatalog::ColDataType dataType  // This will be updated later,
 | |
|       = CalpontSystemCatalog::CHAR;           // CHAR used only for initialization.
 | |
|   ColType* internalType = nullptr;
 | |
|   bool bTokenFlag = false;
 | |
|   int* width = nullptr;
 | |
| 
 | |
|   dataType = curStruct->colDataType;
 | |
|   internalType = &(curStruct->colType);
 | |
|   bTokenFlag = curStruct->tokenFlag;
 | |
|   width = &(curStruct->colWidth);
 | |
| 
 | |
|   switch (dataType)
 | |
|   {
 | |
|     // Map BIT and TINYINT to WR_BYTE
 | |
|     case CalpontSystemCatalog::BIT:
 | |
|     case CalpontSystemCatalog::TINYINT: *internalType = WriteEngine::WR_BYTE; break;
 | |
| 
 | |
|     // Map SMALLINT to WR_SHORT
 | |
|     case CalpontSystemCatalog::SMALLINT: *internalType = WriteEngine::WR_SHORT; break;
 | |
| 
 | |
|     // Map MEDINT to WR_MEDINT
 | |
|     case CalpontSystemCatalog::MEDINT: *internalType = WriteEngine::WR_MEDINT; break;
 | |
| 
 | |
|     // Map INT, and DATE to WR_INT
 | |
|     case CalpontSystemCatalog::INT:
 | |
|     case CalpontSystemCatalog::DATE: *internalType = WriteEngine::WR_INT; break;
 | |
| 
 | |
|     // Map FLOAT and UFLOAT to WR_FLOAT
 | |
|     case CalpontSystemCatalog::FLOAT:
 | |
|     case CalpontSystemCatalog::UFLOAT: *internalType = WriteEngine::WR_FLOAT; break;
 | |
| 
 | |
|     // Map BIGINT, DATETIME, TIME, and TIMESTAMP to WR_LONGLONG
 | |
|     case CalpontSystemCatalog::BIGINT:
 | |
|     case CalpontSystemCatalog::DATETIME:
 | |
|     case CalpontSystemCatalog::TIMESTAMP:
 | |
|     case CalpontSystemCatalog::TIME: *internalType = WriteEngine::WR_LONGLONG; break;
 | |
| 
 | |
|     // Map DOUBLE and UDOUBLE to WR_DOUBLE
 | |
|     case CalpontSystemCatalog::DOUBLE:
 | |
|     case CalpontSystemCatalog::UDOUBLE: *internalType = WriteEngine::WR_DOUBLE; break;
 | |
| 
 | |
|     // Map DECIMAL to applicable integer type
 | |
|     case CalpontSystemCatalog::DECIMAL:
 | |
|     case CalpontSystemCatalog::UDECIMAL:
 | |
|     {
 | |
|       switch (*width)
 | |
|       {
 | |
|         case 1: *internalType = WriteEngine::WR_BYTE; break;
 | |
| 
 | |
|         case 2: *internalType = WriteEngine::WR_SHORT; break;
 | |
| 
 | |
|         case 4: *internalType = WriteEngine::WR_INT; break;
 | |
| 
 | |
|         case 8: *internalType = WriteEngine::WR_LONGLONG; break;
 | |
| 
 | |
|         default: *internalType = WriteEngine::WR_BINARY; break;
 | |
|       }
 | |
| 
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     // Map BLOB to WR_BLOB
 | |
|     case CalpontSystemCatalog::BLOB: *internalType = WriteEngine::WR_BLOB; break;
 | |
| 
 | |
|     // Map TEXT to WR_TEXT
 | |
|     case CalpontSystemCatalog::TEXT: *internalType = WriteEngine::WR_TEXT; break;
 | |
| 
 | |
|     // Map VARBINARY to WR_VARBINARY
 | |
|     case CalpontSystemCatalog::VARBINARY: *internalType = WriteEngine::WR_VARBINARY; break;
 | |
| 
 | |
|     // Map CHAR, VARCHAR, and CLOB to WR_CHAR
 | |
|     case CalpontSystemCatalog::CHAR:
 | |
|     case CalpontSystemCatalog::VARCHAR:
 | |
|     case CalpontSystemCatalog::CLOB: *internalType = WriteEngine::WR_CHAR; break;
 | |
| 
 | |
|     // Map UTINYINT to WR_UBYTE
 | |
|     case CalpontSystemCatalog::UTINYINT: *internalType = WriteEngine::WR_UBYTE; break;
 | |
| 
 | |
|     // Map USMALLINT to WR_USHORT
 | |
|     case CalpontSystemCatalog::USMALLINT: *internalType = WriteEngine::WR_USHORT; break;
 | |
| 
 | |
|     // Map UMEDINT to WR_UMEDINT
 | |
|     case CalpontSystemCatalog::UMEDINT: *internalType = WriteEngine::WR_UMEDINT; break;
 | |
| 
 | |
|     // Map UINT to WR_UINT
 | |
|     case CalpontSystemCatalog::UINT: *internalType = WriteEngine::WR_UINT; break;
 | |
| 
 | |
|     // Map UBIGINT to WR_ULONGLONG
 | |
|     case CalpontSystemCatalog::UBIGINT: *internalType = WriteEngine::WR_ULONGLONG; break;
 | |
| 
 | |
|     default: *internalType = WriteEngine::WR_CHAR; break;
 | |
|   }
 | |
| 
 | |
|   if (bTokenFlag)  // token overwrite any other types
 | |
|     *internalType = WriteEngine::WR_TOKEN;
 | |
| 
 | |
|   // check whether width is in sync with the requirement
 | |
|   *width = getCorrectRowWidth(dataType, *width);
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  *    Get the correct width for a row
 | |
|  * PARAMETERS:
 | |
|  *    dataType - data type
 | |
|  *    width - data width in byte
 | |
|  * RETURN:
 | |
|  *    emptyVal - the value of empty row
 | |
|  ******************************************************************************/
 | |
| /* static */
 | |
| int Convertor::getCorrectRowWidth(CalpontSystemCatalog::ColDataType dataType, int width)
 | |
| {
 | |
|   int offset, newWidth = 4;
 | |
| 
 | |
|   switch (dataType)
 | |
|   {
 | |
|     case CalpontSystemCatalog::TINYINT:
 | |
|     case CalpontSystemCatalog::UTINYINT: newWidth = 1; break;
 | |
| 
 | |
|     case CalpontSystemCatalog::SMALLINT:
 | |
|     case CalpontSystemCatalog::USMALLINT: newWidth = 2; break;
 | |
| 
 | |
|     case CalpontSystemCatalog::MEDINT:
 | |
|     case CalpontSystemCatalog::INT:
 | |
|     case CalpontSystemCatalog::UMEDINT:
 | |
|     case CalpontSystemCatalog::UINT: newWidth = 4; break;
 | |
| 
 | |
|     case CalpontSystemCatalog::BIGINT:
 | |
|     case CalpontSystemCatalog::UBIGINT: newWidth = 8; break;
 | |
| 
 | |
|     case CalpontSystemCatalog::FLOAT:
 | |
|     case CalpontSystemCatalog::UFLOAT: newWidth = 4; break;
 | |
| 
 | |
|     case CalpontSystemCatalog::DOUBLE:
 | |
|     case CalpontSystemCatalog::UDOUBLE: newWidth = 8; break;
 | |
| 
 | |
|     case CalpontSystemCatalog::DECIMAL:
 | |
|     case CalpontSystemCatalog::UDECIMAL:
 | |
|       if (width == 1)
 | |
|         newWidth = 1;
 | |
|       else if (width == 2)
 | |
|         newWidth = 2;
 | |
|       else if (width <= 4)
 | |
|         newWidth = 4;
 | |
|       else if (width <= 8)
 | |
|         newWidth = 8;
 | |
|       else
 | |
|         newWidth = 16;
 | |
|       break;
 | |
| 
 | |
|     case CalpontSystemCatalog::DATE: newWidth = 4; break;
 | |
| 
 | |
|     case CalpontSystemCatalog::DATETIME:
 | |
|     case CalpontSystemCatalog::TIME:
 | |
|     case CalpontSystemCatalog::TIMESTAMP: newWidth = 8; break;
 | |
| 
 | |
|     case CalpontSystemCatalog::CHAR:
 | |
|     case CalpontSystemCatalog::VARCHAR:
 | |
|     case CalpontSystemCatalog::VARBINARY:  // treat same as varchar for now
 | |
|     default:
 | |
|       offset = (dataType == CalpontSystemCatalog::VARCHAR) ? -1 : 0;
 | |
|       newWidth = 1;
 | |
| 
 | |
|       if (width == (2 + offset))
 | |
|         newWidth = 2;
 | |
|       else if (width >= (3 + offset) && width <= (4 + offset))
 | |
|         newWidth = 4;
 | |
|       else if (width >= (5 + offset))
 | |
|         newWidth = 8;
 | |
| 
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   return newWidth;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  * Converts an OID into a group if directories and a filename.
 | |
|  *
 | |
|  * This function takes a 32-bit Object ID (OID).  If DLen is 0, then the
 | |
|  * OID is converted into 3 hierarchical directory names and a filename.
 | |
|  * If DLen is >0 then the OID is converted into 5 hierarchical directory
 | |
|  * names and a filename (using the partition and segment as additional
 | |
|  * input into the filepath.  The actual location
 | |
|  * of the file is <DBRoot>/<DirA>/<DirB>/<DirC>/<FName>, or
 | |
|  * <DBRoot>/<DirA>/<DirB>/<DirC>/<DirD>/<part#>/<segFName>.  The <DBRoot>
 | |
|  * entry must be pre-pended by the calling application after calling
 | |
|  * this function.  The value for <DBRoot> is stored in the Columnstore.xml
 | |
|  * configuration file.
 | |
|  *
 | |
|  * PARAMETERS:
 | |
|  *    oid       INPUT -- The Object Id.
 | |
|  *    partition INPUT -- partition to be included in filepath.
 | |
|  *    segment   INPUT -- segment to be included in filepath.
 | |
|  *    dmFilePathArgs* INPUT/OUTPUT -- Points to a buffer structure
 | |
|  *
 | |
|  * RETURN:
 | |
|  *    return 0 if everything went OK.  -1 if an error occurred.  Two
 | |
|  *    kinds of errors are possible:
 | |
|  *
 | |
|  *        - a null pointer was passed in
 | |
|  *        - truncation occurred.
 | |
|  *
 | |
|  *    If a null buffer pointer is passed in, a return code
 | |
|  *    of -1 will be returned FOR THAT BUFFER.
 | |
|  *
 | |
|  *    Truncation can occur if the buffer length specified in
 | |
|  *    dmFilePathArgs is too small.
 | |
|  *
 | |
|  *    If a buffer's return code is not zero, the appropriate
 | |
|  *    return code in dmfilePathArgs can be examined.  If a
 | |
|  *    buffer's return code is be less than zero, the
 | |
|  *    corresponding buffer pointer was NULL.  If it is greater
 | |
|  *    or equal to the buffer's length argument, length is too small
 | |
|  ******************************************************************************/
 | |
| /*static*/
 | |
| int Convertor::dmOid2FPath(uint32_t oid, uint32_t partition, uint32_t segment, dmFilePathArgs_t* pArgs)
 | |
| {
 | |
|   int32_t retCodes[6];
 | |
|   // A, B, C directories.
 | |
|   for (uint32_t i = 0, shift = 24, mask = 0xff000000; i < 3; ++i, shift -= 8, mask >>= 8)
 | |
|   {
 | |
|     retCodes[i] = _doDir(pArgs->pathPart[i].pName, pArgs->pathPart[i].len, (uint32_t)((oid & mask) >> shift));
 | |
|   }
 | |
| 
 | |
|   // include partition and seg num in the file path if they are present
 | |
|   if (pArgs->pathPart[3].len > 0)
 | |
|   {
 | |
|     // D directory.
 | |
|     retCodes[3] = _doDir(pArgs->pathPart[3].pName, pArgs->pathPart[3].len, (uint32_t)(oid & 0x000000ff));
 | |
| 
 | |
|     // E directory - partition.
 | |
|     retCodes[4] = _doDir(pArgs->pathPart[4].pName, pArgs->pathPart[4].len, partition);
 | |
|     // File - segment.
 | |
|     retCodes[5] = _doFile(pArgs->pathPart[5].pName, pArgs->pathPart[5].len, segment);
 | |
| 
 | |
|     // D.rc < 0 || E.rc < 0
 | |
|     if ((retCodes[3] < 0) || (retCodes[4] < 0))
 | |
|       return -1;
 | |
| 
 | |
|     // D.rc >= A.len || E.rc >= A.len
 | |
|     if ((retCodes[3] >= pArgs->pathPart[0].len) || (retCodes[4] >= pArgs->pathPart[0].len))
 | |
|       return -1;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     retCodes[5] = _doFile(pArgs->pathPart[5].pName, pArgs->pathPart[5].len, (uint32_t)(oid & 0x000000ff));
 | |
|   }
 | |
| 
 | |
|   // A.rc < 0 || B.rc < 0 || C.rc < 0
 | |
|   // A.rc >= A.len || B.rc >= B.len || C.rc >= C.len
 | |
|   for (uint32_t i = 0; i < 3; ++i)
 | |
|   {
 | |
|     if ((retCodes[i] < 0) || (retCodes[i] >= pArgs->pathPart[i].len))
 | |
|       return -1;
 | |
|   }
 | |
|   // F.rc < 0 || F.rc >= F.len
 | |
|   if ((retCodes[5] < 0) || (retCodes[5] >= pArgs->pathPart[5].len))
 | |
|     return -1;
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| /*******************************************************************************
 | |
|  * DESCRIPTION:
 | |
|  * Converts populated `dmFilePathArgs_t` struct to an oid, partition,
 | |
|  * and segment.
 | |
|  *
 | |
|  * PARAMETERS:
 | |
|  *    pArgs     INPUT --  a const reference to `dmFilePathArgs_t` struct.
 | |
|  *    oid       OUTPUT -- oid for the given file name.
 | |
|  *    partition OUTPUT -- partition for the given file name.
 | |
|  *    segment   OUTPUT -- segment for the given filename.
 | |
|  *
 | |
|  * RETURN:
 | |
|  *    return 0 if everything went OK.  -1 if an error occurred.
 | |
|  ******************************************************************************/
 | |
| int32_t Convertor::dmFPath2Oid(const dmFilePathArgs_t& pArgs, uint32_t& oid, uint32_t& partition,
 | |
|                                uint32_t& segment)
 | |
| {
 | |
|   oid = 0;
 | |
|   // A, B, C, D - directories.
 | |
|   for (uint32_t i = 0, shift = 24; i < 4; ++i, shift -= 8)
 | |
|   {
 | |
|     uint32_t val = 0;
 | |
|     auto rc = _fromDir(pArgs.pathPart[i].pName, val);
 | |
|     if (rc == -1)
 | |
|       return rc;
 | |
| 
 | |
|     oid |= val << shift;
 | |
|   }
 | |
| 
 | |
|   // Partition.
 | |
|   auto rc = _fromDir(pArgs.pathPart[4].pName, partition);
 | |
|   if (rc == -1)
 | |
|     return rc;
 | |
| 
 | |
|   // Segment.
 | |
|   rc = _fromFile(pArgs.pathPart[5].pName, segment);
 | |
|   if (rc == -1)
 | |
|     return rc;
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| }  // namespace WriteEngine
 |