1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-07 03:22:57 +03:00

MCOL-3536 collation

This commit is contained in:
David Hall
2020-05-26 12:42:11 -05:00
parent 11ba12f6ea
commit 06e50e0926
47 changed files with 516 additions and 535 deletions

View File

@@ -22,6 +22,10 @@
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string>
//#define NDEBUG
#include <cassert>
@@ -180,20 +184,21 @@ inline uint64_t simple_case_cmp(Row& row,
case execplan::CalpontSystemCatalog::VARCHAR:
{
const string& ev = parm[n]->data()->getStrVal(row, isNull);
if (isNull)
break;
CHARSET_INFO* cs = parm[n]->data()->resultType().getCharset();
for (i = 1; i <= whereCount; i++)
{
//BUG 5362
if (utf8::idb_strcoll(ev.c_str(), parm[i]->data()->getStrVal(row, isNull).c_str()) == 0 && !isNull)
const string& p1 = parm[i]->data()->getStrVal(row, isNull);
if (isNull)
break;
if (cs->strnncoll(ev.c_str(), ev.length(), p1.c_str(), p1.length()) == 0)
{
foundIt = true;
break;
}
else
isNull = false;
}
break;

View File

@@ -49,7 +49,7 @@ namespace funcexp
CalpontSystemCatalog::ColType Func_char_length::operationType( FunctionParm& fp, CalpontSystemCatalog::ColType& resultType )
{
return resultType;
return fp[0]->data()->resultType();
}
int64_t Func_char_length::getIntVal(rowgroup::Row& row,
@@ -86,8 +86,7 @@ int64_t Func_char_length::getIntVal(rowgroup::Row& row,
return 0;
const char* b = tstr.c_str();
const char* e = tstr.c_str() + tstr.length();
const CHARSET_INFO* cs = get_charset(parm[0]->data()->resultType().charsetNumber, MYF(MY_WME));
return (int64_t)cs->numchars(b, e);
return (int64_t)parm[0]->data()->resultType().getCharset()->numchars(b, e);
}
case execplan::CalpontSystemCatalog::DATE:

View File

@@ -22,6 +22,10 @@
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <cstdlib>
#include <string>
#include <sstream>
@@ -148,6 +152,7 @@ std::string Func_greatest::getStrVal(rowgroup::Row& row,
execplan::CalpontSystemCatalog::ColType& op_ct)
{
const string& str = fp[0]->data()->getStrVal(row, isNull);
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
string greatestStr = str;
@@ -155,12 +160,10 @@ std::string Func_greatest::getStrVal(rowgroup::Row& row,
{
const string& str1 = fp[i]->data()->getStrVal(row, isNull);
int tmp = utf8::idb_strcoll(greatestStr.c_str(), str1.c_str());
if ( tmp < 0 )
// if ( greatestStr < str1 )
if (cs->strnncoll(greatestStr.c_str(), greatestStr.length(), str1.c_str(), str1.length()) < 0)
{
greatestStr = str1;
}
}
return greatestStr;

View File

@@ -22,6 +22,10 @@
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <cstdlib>
#include <string>
using namespace std;
@@ -52,11 +56,6 @@ inline bool numericEQ(result_t op1, result_t op2)
return op1 == op2;
}
inline bool strEQ(string op1, string op2)
{
return utf8::idb_strcoll(op1.c_str(), op2.c_str()) == 0;
}
inline bool getBoolForIn(rowgroup::Row& row,
funcexp::FunctionParm& pm,
bool& isNull,
@@ -273,15 +272,16 @@ inline bool getBoolForIn(rowgroup::Row& row,
case execplan::CalpontSystemCatalog::TEXT:
{
const string& val = pm[0]->data()->getStrVal(row, isNull);
if (isNull)
return false;
CHARSET_INFO* cs = pm[0]->data()->resultType().getCharset();
for (uint32_t i = 1; i < pm.size(); i++)
{
isNull = false;
if ( utf8::idb_strcoll(val.c_str(), pm[i]->data()->getStrVal(row, isNull).c_str()) == 0 && !isNull)
const string& str1 = pm[i]->data()->getStrVal(row, isNull);
if (cs->strnncoll(val.c_str(), val.length(), str1.c_str(), str1.length()) == 0 && !isNull)
return true;
if (isNull && isNotIn)

View File

@@ -20,6 +20,10 @@
*
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <m_ctype.h>
#include <cstdlib>
#include <string>
@@ -42,37 +46,50 @@ CalpontSystemCatalog::ColType Func_instr::operationType( FunctionParm& fp, Calpo
return ct;
}
size_t Func_instr::in_str(const string& str, const string& substr, size_t start)
{
// convert both inputs to wide character strings
std::wstring wcstr = utf8::utf8_to_wstring(str);
std::wstring wcsubstr = utf8::utf8_to_wstring(substr);
if ((str.length() && !wcstr.length()) ||
(substr.length() && !wcsubstr.length()))
// this means one or both of the strings had conversion errors to wide character
return 0;
size_t pos = wcstr.find(wcsubstr, start - 1);
return (pos != string::npos ? pos + 1 : 0);
}
int64_t Func_instr::getIntVal(rowgroup::Row& row,
FunctionParm& parm,
bool& isNull,
CalpontSystemCatalog::ColType&)
CalpontSystemCatalog::ColType& colType)
{
uint64_t start = 1;
if (parm.size() == 3)
start = parm[2]->data()->getIntVal(row, isNull);
if (isNull || start == 0)
int64_t start = 0;
int64_t start0= 0;
my_match_t match;
const std::string& str = parm[0]->data()->getStrVal(row, isNull);
if (isNull)
return 0;
const char* s1 = str.c_str();
uint32_t l1 = (uint32_t)str.length();
const std::string& substr =parm[1]->data()->getStrVal(row, isNull);
if (isNull)
return 0;
//Bug 5110 : to support utf8 char type, we have to convert and search
return in_str(parm[0]->data()->getStrVal(row, isNull), parm[1]->data()->getStrVal(row, isNull), start);
const char* s2 = substr.c_str();
uint32_t l2 = (uint32_t)substr.length();
if (l2 < 1)
return start + 1;
CHARSET_INFO* cs = colType.getCharset();
if (parm.size() == 3)
{
start0 = start = parm[2]->data()->getIntVal(row, isNull) - 1;
if ((start < 0) || (start > l1))
return 0;
start = (int64_t)cs->charpos(s1, s1+l1, start); // adjust start for multi-byte
if (start + l2 > l1) // Substring is longer than str at pos.
return 0;
}
if (!cs->instr(s1+start, l1-start,
s2, l2,
&match, 1))
return 0;
return (int64_t)match.mb_len + start0 + 1;
}

View File

@@ -20,6 +20,10 @@
*
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <m_ctype.h>
#include <string>
using namespace std;
@@ -56,31 +60,22 @@ CalpontSystemCatalog::ColType Func_lcase::operationType(FunctionParm& fp, Calpon
std::string Func_lcase::getStrVal(rowgroup::Row& row,
FunctionParm& fp,
bool& isNull,
execplan::CalpontSystemCatalog::ColType&)
execplan::CalpontSystemCatalog::ColType& colType)
{
// string str = fp[0]->data()->getStrVal(row, isNull);
// transform (str.begin(), str.end(), str.begin(), to_lower());
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
wchar_t* wcbuf = new wchar_t[strwclen];
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
wstring wstr(wcbuf, strwclen);
CHARSET_INFO* cs = colType.getCharset();
uint64_t inLen = tstr.length();
uint64_t bufLen= inLen * cs->casedn_multiply;
char* outBuf = new char[bufLen];
uint64_t outLen = cs->casedn(tstr.c_str(), inLen, outBuf, bufLen);
for (uint32_t i = 0; i < strwclen; i++)
wstr[i] = std::towlower(wstr[i]);
size_t strmblen = utf8::idb_wcstombs(0, wstr.c_str(), 0) + 1;
char* outbuf = new char[strmblen];
strmblen = utf8::idb_wcstombs(outbuf, wstr.c_str(), strmblen);
std::string ret(outbuf, strmblen);
delete [] outbuf;
delete [] wcbuf;
string ret = string(outBuf, outLen);
delete [] outBuf;
return ret;
}

View File

@@ -22,6 +22,10 @@
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <cstdlib>
#include <string>
#include <sstream>
@@ -127,17 +131,16 @@ std::string Func_least::getStrVal(rowgroup::Row& row,
execplan::CalpontSystemCatalog::ColType& op_ct)
{
string leastStr = fp[0]->data()->getStrVal(row, isNull);
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
for (uint32_t i = 1; i < fp.size(); i++)
{
const string& str1 = fp[i]->data()->getStrVal(row, isNull);
int tmp = utf8::idb_strcoll(leastStr.c_str(), str1.c_str());
if ( tmp > 0 )
// if ( leastStr > str1 )
if (cs->strnncoll(leastStr.c_str(), leastStr.length(), str1.c_str(), str1.length()) > 0)
{
leastStr = str1;
}
}
return leastStr;

View File

@@ -22,6 +22,11 @@
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#undef LONGLONG_MIN
#include <my_sys.h>
#include <cstdlib>
#include <string>
#include <sstream>
@@ -363,6 +368,7 @@ string Func_nullif::getStrVal(rowgroup::Row& row,
CalpontSystemCatalog::ColType& op_ct)
{
string exp1 = parm[0]->data()->getStrVal(row, isNull);
CHARSET_INFO* cs = parm[0]->data()->resultType().getCharset();
if (isNull)
{
@@ -395,7 +401,7 @@ string Func_nullif::getStrVal(rowgroup::Row& row,
exp2 = exp2 + " 00:00:00";
}
if ( utf8::idb_strcoll(exp1.c_str(), exp2.c_str()) == 0 )
if (cs->strnncoll(exp1.c_str(), exp1.length(), exp2.c_str(), exp2.length()) == 0)
{
isNull = true;
return "";

View File

@@ -21,6 +21,10 @@
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <cstdlib>
#include <string>
#include <sstream>
@@ -39,6 +43,10 @@ using namespace joblist;
#include "utils_utf8.h"
using namespace funcexp;
// Because including my_sys.h in a Columnstore header causes too many conflicts
struct charset_info_st;
typedef const struct charset_info_st CHARSET_INFO;
class to_lower
{
public:
@@ -64,10 +72,11 @@ int64_t Func_strcmp::getIntVal(rowgroup::Row& row,
bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct)
{
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
const string& str = fp[0]->data()->getStrVal(row, isNull);
const string& str1 = fp[1]->data()->getStrVal(row, isNull);
int ret = utf8::idb_strcoll(str.c_str(), str1.c_str());
int ret = cs->strnncoll(str.c_str(), str.length(), str1.c_str(), str1.length());
// mysql's strcmp returns only -1, 0, and 1
return (ret < 0 ? -1 : (ret > 0 ? 1 : 0));
}

View File

@@ -20,6 +20,10 @@
*
*
****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <m_ctype.h>
#include <string>
using namespace std;
@@ -55,31 +59,22 @@ CalpontSystemCatalog::ColType Func_ucase::operationType(FunctionParm& fp, Calpon
std::string Func_ucase::getStrVal(rowgroup::Row& row,
FunctionParm& fp,
bool& isNull,
execplan::CalpontSystemCatalog::ColType&)
execplan::CalpontSystemCatalog::ColType& colType)
{
// string str = fp[0]->data()->getStrVal(row, isNull);
// transform (str.begin(), str.end(), str.begin(), to_lower());
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
wchar_t* wcbuf = new wchar_t[strwclen];
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
wstring wstr(wcbuf, strwclen);
CHARSET_INFO* cs = colType.getCharset();
uint64_t inLen = tstr.length();
uint64_t bufLen= inLen * cs->caseup_multiply;
char* outBuf = new char[bufLen];
uint64_t outLen = cs->caseup(tstr.c_str(), inLen, outBuf, bufLen);
for (uint32_t i = 0; i < strwclen; i++)
wstr[i] = std::towupper(wstr[i]);
size_t strmblen = utf8::idb_wcstombs(0, wstr.c_str(), 0) + 1;
char* outbuf = new char[strmblen];
strmblen = utf8::idb_wcstombs(outbuf, wstr.c_str(), strmblen);
std::string ret(outbuf, strmblen);
delete [] outbuf;
delete [] wcbuf;
string ret = string(outBuf, outLen);
delete [] outBuf;
return ret;
}

View File

@@ -84,8 +84,6 @@ public:
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType);
size_t in_str(const std::string& str, const std::string& substr, size_t start);
int64_t getIntVal(rowgroup::Row& row,
FunctionParm& fp,
bool& isNull,

View File

@@ -1,303 +0,0 @@
/* Copyright (C) 2014 InfiniDB, Inc.
* Copyright (C) 2016 MariaDB Corporation.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
// $Id$
#ifndef _UTILS_UTF8_H_
#define _UTILS_UTF8_H_
#include <string>
#if defined(_MSC_VER)
#include <malloc.h>
#include <windows.h>
#elif defined(__FreeBSD__)
//#include <cstdlib>
#else
#include <alloca.h>
#endif
#include <cstdlib>
#include <clocale>
#include "liboamcpp.h"
/** @file */
namespace funcexp
{
namespace utf8
{
extern bool JPcodePoint; // code point ordering (Japanese UTF) flag, used in idb_strcoll
const int MAX_UTF8_BYTES_PER_CHAR = 4;
// A global loc object so we don't construct one at every compare
extern std::locale loc;
// Is there a way to construct a global reference to a facet?
// const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
//Infinidb version of strlocale BUG 5362
//set System Locale "C" by default
//return the system Locale currently set in from Columnstore.xml
inline
std::string idb_setlocale()
{
// get and set locale language
std::string systemLang("C");
oam::Oam oam;
static bool loggedMsg = false;
try
{
oam.getSystemConfig("SystemLang", systemLang);
}
catch (...)
{
systemLang = "C";
}
char* pLoc = setlocale(LC_ALL, systemLang.c_str());
if (pLoc == NULL)
{
try
{
if (!loggedMsg)
{
//send alarm
alarmmanager::ALARMManager alarmMgr;
std::string alarmItem = "system";
alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::SET);
// Log one line
logging::LoggingID lid(17); // ProcessManager -- probably the only one to find this for now
logging::MessageLog ml(lid);
logging::Message msg(1);
logging::Message::Args args;
args.add("Failed to set locale ");
args.add(systemLang.c_str());
args.add(": Setting to 'C'. Critical alarm generated");
msg.format( args );
ml.logErrorMessage(msg);
loggedMsg = true;
}
systemLang = "C";
}
catch (...)
{
// Ignoring for time being.
}
}
else
{
try
{
//send alarm
alarmmanager::ALARMManager alarmMgr;
std::string alarmItem = "system";
alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::CLEAR);
}
catch (...)
{
// Ignoring for time being.
}
}
printf ("Locale is : %s\n", systemLang.c_str() );
//BUG 2991
setlocale(LC_NUMERIC, "C");
if (systemLang.find("ja_JP") != std::string::npos)
JPcodePoint = true;
// MCOL-1559 Save off the locale to save runtime cpus
std::locale localloc(systemLang.c_str());
loc = localloc;
return systemLang;
}
// Infinidb version of strcoll. BUG 5362
// strcoll() comparison while ja_JP.utf8 does not give correct results.
// For correct results strcmp() can be used.
inline
int idb_strcoll(const char* str1, const char* str2)
{
if (JPcodePoint)
return strcmp(str1, str2);
else
return strcoll(str1, str2);
}
// MCOL-1559 Add a trimmed version of strcoll
// The intent here is to make no copy of the original strings and
// not modify them, so we can't use trim to deal with the spaces.
inline
int idb_strtrimcoll(const std::string& str1, const std::string& str2)
{
static const std::string whitespaces (" ");
const char* s1 = str1.c_str();
const char* s2 = str2.c_str();
// Set found1 to the last non-whitespace char in str1
std::size_t found1 = str1.find_last_not_of(whitespaces);
// Set found2 to the first whitespace char in str2
std::size_t found2 = str2.find_last_not_of(whitespaces);
// Are both strings empty or all whitespace?
if (found1 == std::string::npos && found2 == std::string::npos)
{
return 0; // they match
}
// If str1 is empty or all spaces
if (found1 == std::string::npos)
{
return -1;
}
// If str2 is empty or all spaces
if (found2 == std::string::npos)
{
return 1;
}
// found1 and found2 point to the character that is not a space.
// compare wants it to point to one past.
found1 += 1;
found2 += 1;
// If no trimming needs doing, then strcoll is faster
if (found1 == str1.size() && found2 == str2.size())
{
return idb_strcoll(s1, s2);
}
// Compare the (trimmed) strings
const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
int rtn = coll.compare(s1, s1+found1, s2, s2+found2);
return rtn;
}
// BUG 5241
// Infinidb specific mbstowcs(). This will handle both windows and unix platforms
// Params dest and max should have enough length to accomodate NULL
inline
size_t idb_mbstowcs(wchar_t* dest, const char* src, size_t max)
{
#ifdef _MSC_VER
// 4th param (-1) denotes to convert till hit NULL char
// if 6th param max = 0, will return the required buffer size
size_t strwclen = MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, (int)max);
// decrement the count of NULL; will become -1 on failure
return --strwclen;
#else
return mbstowcs(dest, src, max);
#endif
}
// BUG 5241
// Infinidb specific wcstombs(). This will handle both windows and unix platforms
// Params dest and max should have enough length to accomodate NULL
inline
size_t idb_wcstombs(char* dest, const wchar_t* src, size_t max)
{
#ifdef _MSC_VER
// 4th param (-1) denotes to convert till hit NULL char
//if 6th param max = 0, will return the required buffer size
size_t strmblen = WideCharToMultiByte( CP_UTF8, 0, src, -1, dest, (int)max, NULL, NULL);
// decrement the count of NULL; will become -1 on failure
return --strmblen;
#else
return wcstombs(dest, src, max);
#endif
}
// convert UTF-8 string to wstring
inline
std::wstring utf8_to_wstring (const std::string& str)
{
size_t bufsize = str.length() + 1;
// Convert to wide characters. Do all further work in wide characters
wchar_t* wcbuf = new wchar_t[bufsize];
// Passing +1 so that windows is happy to see extra position to place NULL
size_t strwclen = idb_mbstowcs(wcbuf, str.c_str(), str.length() + 1);
// if result is -1 it means bad characters which may happen if locale is wrong.
// return an empty string
if ( strwclen == static_cast<size_t>(-1) )
strwclen = 0;
std::wstring ret(wcbuf, strwclen);
delete [] wcbuf;
return ret;
}
// convert wstring to UTF-8 string
inline
std::string wstring_to_utf8 (const std::wstring& str)
{
char* outbuf = new char[(str.length() * MAX_UTF8_BYTES_PER_CHAR) + 1];
// Passing +1 so that windows is happy to see extra position to place NULL
size_t strmblen = idb_wcstombs(outbuf, str.c_str(), str.length() * MAX_UTF8_BYTES_PER_CHAR + 1);
// if result is -1 it means bad characters which may happen if locale is wrong.
// return an empty string
if ( strmblen == static_cast<size_t>(-1) )
strmblen = 0;
std::string ret(outbuf, strmblen);
delete [] outbuf;
return ret;
}
inline
uint8_t utf8_truncate_point(const char* input, size_t length)
{
// Find the beginning of a multibyte char to truncate at and return the
// number of bytes to truncate
if (length < 3)
{
return 0;
}
const unsigned char* b = (const unsigned char*)(input) + length - 3;
if (b[2] & 0x80)
{
// First byte in a new multi-byte sequence
if (b[2] & 0x40) return 1;
// 3 byte sequence
else if ((b[1] & 0xe0) == 0xe0) return 2;
// 4 byte sequence
else if ((b[0] & 0xf0) == 0xf0) return 3;
}
return 0;
}
} //namespace utf8
} //namespace funcexp
#endif

View File

@@ -24,6 +24,10 @@
* is the primary class.
*/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <unistd.h>
#include <sstream>
#include <stdexcept>
@@ -384,36 +388,16 @@ inline void RowAggregation::updateFloatMinMax(float val1, float val2, int64_t co
fRow.setFloatField(val1, col);
}
#define STRCOLL_ENH__
void RowAggregation::updateStringMinMax(string val1, string val2, int64_t col, int func)
{
if (isNull(fRowGroupOut, fRow, col))
CHARSET_INFO* cs = fRowGroupIn.getCharset(col);
int tmp = cs->strnncoll(val1.c_str(), val1.length(), val2.c_str(), val2.length());
if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) ||
(tmp > 0 && func == rowgroup::ROWAGG_MAX))
{
fRow.setStringField(val1, col);
}
#ifdef STRCOLL_ENH__
else
{
int tmp = utf8::idb_strcoll(val1.c_str(), val2.c_str());
if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) ||
(tmp > 0 && func == rowgroup::ROWAGG_MAX))
{
fRow.setStringField(val1, col);
}
}
#else
else if (minMax(val1, val2, func))
{
fRow.setStringField(val1, col);
}
#endif
}
//------------------------------------------------------------------------------

View File

@@ -53,6 +53,9 @@
#include "mcsv1_udaf.h"
#include "constantcolumn.h"
// Because including my_sys.h in a Columnstore header causes too many conflicts
struct charset_info_st;
typedef const struct charset_info_st CHARSET_INFO;
// To do: move code that depends on joblist to a proper subsystem.
namespace joblist
{
@@ -706,7 +709,7 @@ protected:
// We need a separate copy for each thread.
mcsv1sdk::mcsv1Context fRGContext;
// These are handy for testing the actual type of static_any for UDAF
static const static_any::any& charTypeId;
static const static_any::any& scharTypeId;

View File

@@ -505,8 +505,8 @@ Row::Row() : data(NULL), strings(NULL), userDataStore(NULL) { }
Row::Row(const Row& r) : columnCount(r.columnCount), baseRid(r.baseRid),
oldOffsets(r.oldOffsets), stOffsets(r.stOffsets),
offsets(r.offsets), colWidths(r.colWidths), types(r.types), data(r.data),
scale(r.scale), precision(r.precision), strings(r.strings),
offsets(r.offsets), colWidths(r.colWidths), types(r.types), charsetNumbers(r.charsetNumbers),
data(r.data), scale(r.scale), precision(r.precision), strings(r.strings),
useStringTable(r.useStringTable), hasLongStringField(r.hasLongStringField),
sTableThreshold(r.sTableThreshold), forceInline(r.forceInline), userDataStore(NULL)
{ }
@@ -522,6 +522,7 @@ Row& Row::operator=(const Row& r)
offsets = r.offsets;
colWidths = r.colWidths;
types = r.types;
charsetNumbers = r.charsetNumbers;
data = r.data;
scale = r.scale;
precision = r.precision;
@@ -1006,6 +1007,7 @@ RowGroup::RowGroup(uint32_t colCount,
const vector<uint32_t>& roids,
const vector<uint32_t>& tkeys,
const vector<CalpontSystemCatalog::ColDataType>& colTypes,
const vector<uint32_t>& csNumbers,
const vector<uint32_t>& cscale,
const vector<uint32_t>& cprecision,
uint32_t stringTableThreshold,
@@ -1013,7 +1015,7 @@ RowGroup::RowGroup(uint32_t colCount,
const vector<bool>& forceInlineData
) :
columnCount(colCount), data(NULL), oldOffsets(positions), oids(roids), keys(tkeys),
types(colTypes), scale(cscale), precision(cprecision), rgData(NULL), strings(NULL),
types(colTypes), charsetNumbers(csNumbers), scale(cscale), precision(cprecision), rgData(NULL), strings(NULL),
sTableThreshold(stringTableThreshold)
{
uint32_t i;
@@ -1047,12 +1049,16 @@ RowGroup::RowGroup(uint32_t colCount,
useStringTable = (stringTable && hasLongStringField);
offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
// Set all the charsets to NULL for jit initialization.
charsets.insert(charsets.begin(), charsetNumbers.size(), NULL);
}
RowGroup::RowGroup(const RowGroup& r) :
columnCount(r.columnCount), data(r.data), oldOffsets(r.oldOffsets),
stOffsets(r.stOffsets), colWidths(r.colWidths),
oids(r.oids), keys(r.keys), types(r.types), scale(r.scale), precision(r.precision),
oids(r.oids), keys(r.keys), types(r.types), charsetNumbers(r.charsetNumbers),
charsets(r.charsets), scale(r.scale), precision(r.precision),
rgData(r.rgData), strings(r.strings), useStringTable(r.useStringTable),
hasLongStringField(r.hasLongStringField), sTableThreshold(r.sTableThreshold),
forceInline(r.forceInline)
@@ -1076,6 +1082,8 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
oids = r.oids;
keys = r.keys;
types = r.types;
charsetNumbers = r.charsetNumbers;
charsets = r.charsets;
data = r.data;
scale = r.scale;
precision = r.precision;
@@ -1120,6 +1128,7 @@ void RowGroup::serialize(ByteStream& bs) const
serializeInlineVector<uint32_t>(bs, oids);
serializeInlineVector<uint32_t>(bs, keys);
serializeInlineVector<CalpontSystemCatalog::ColDataType>(bs, types);
serializeInlineVector<uint32_t>(bs, charsetNumbers);
serializeInlineVector<uint32_t>(bs, scale);
serializeInlineVector<uint32_t>(bs, precision);
bs << (uint8_t) useStringTable;
@@ -1139,6 +1148,7 @@ void RowGroup::deserialize(ByteStream& bs)
deserializeInlineVector<uint32_t>(bs, oids);
deserializeInlineVector<uint32_t>(bs, keys);
deserializeInlineVector<CalpontSystemCatalog::ColDataType>(bs, types);
deserializeInlineVector<uint32_t>(bs, charsetNumbers);
deserializeInlineVector<uint32_t>(bs, scale);
deserializeInlineVector<uint32_t>(bs, precision);
bs >> tmp8;
@@ -1156,6 +1166,10 @@ void RowGroup::deserialize(ByteStream& bs)
offsets = &stOffsets[0];
else if (!useStringTable && !oldOffsets.empty())
offsets = &oldOffsets[0];
// Set all the charsets to NULL for jit initialization.
charsets.insert(charsets.begin(), charsetNumbers.size(), NULL);
}
void RowGroup::serializeRGData(ByteStream& bs) const
@@ -1467,6 +1481,15 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList&
}
}
CHARSET_INFO* RowGroup::getCharset(uint32_t col)
{
if (charsets[col] == NULL)
{
charsets[col] = get_charset(charsetNumbers[col], MYF(MY_WME));
}
return charsets[col];
}
void RowGroup::setDBRoot(uint32_t dbroot)
{
*((uint32_t*) &data[dbRootOffset]) = dbroot;

View File

@@ -58,6 +58,11 @@
#include "../winport/winport.h"
// Because including my_sys.h in a Columnstore header causes too many conflicts
struct charset_info_st;
typedef const struct charset_info_st CHARSET_INFO;
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
namespace rowgroup
@@ -319,6 +324,7 @@ public:
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
// this returns true if the type is not CHAR or VARCHAR
inline bool isCharType(uint32_t colIndex) const;
@@ -461,6 +467,7 @@ private:
uint32_t* offsets;
uint32_t* colWidths;
execplan::CalpontSystemCatalog::ColDataType* types;
uint32_t* charsetNumbers;
uint8_t* data;
uint32_t* scale;
uint32_t* precision;
@@ -569,6 +576,11 @@ inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() con
return types;
}
inline uint32_t Row::getCharsetNumber(uint32_t col) const
{
return charsetNumbers[col];
}
inline bool Row::isCharType(uint32_t colIndex) const
{
return execplan::isCharType(types[colIndex]);
@@ -1268,6 +1280,7 @@ public:
@param coids An array of oids for each column.
@param tkeys An array of unique id for each column.
@param colTypes An array of COLTYPEs for each column.
@param charsetNumbers an Array of the lookup numbers for the charset/collation object.
@param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
@param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
*/
@@ -1277,6 +1290,7 @@ public:
const std::vector<uint32_t>& cOids,
const std::vector<uint32_t>& tkeys,
const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
const std::vector<uint32_t>& charsetNumbers,
const std::vector<uint32_t>& scale,
const std::vector<uint32_t>& precision,
uint32_t stringTableThreshold,
@@ -1284,7 +1298,7 @@ public:
const std::vector<bool>& forceInlineData = std::vector<bool>()
);
/** @brief The copiers. It copies metadata, not the row data */
/** @brief The copiers. It copies metadata, not thetypes row data */
RowGroup(const RowGroup&);
/** @brief Assignment operator. It copies metadata, not the row data */
@@ -1338,6 +1352,8 @@ public:
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
inline const std::vector<uint32_t>& getCharsetNumbers() const;
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
inline boost::shared_array<bool>& getForceInline();
static inline uint32_t getHeaderSize()
{
@@ -1397,6 +1413,8 @@ public:
uint16_t* blockNum);
inline void setStringStore(boost::shared_ptr<StringStore>);
CHARSET_INFO* getCharset(uint32_t col);
private:
uint32_t columnCount;
@@ -1413,8 +1431,11 @@ private:
// Used to map the projected column and rowgroup index
std::vector<uint32_t> keys;
std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
// DECIMAL support. For non-decimal fields, the values are 0.
// For string collation
std::vector<uint32_t> charsetNumbers;
std::vector<CHARSET_INFO*> charsets;
// DECIMAL support. For non-decimal fields, the valutypeses are 0.
std::vector<uint32_t> scale;
std::vector<uint32_t> precision;
@@ -1547,6 +1568,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const
{
r->colWidths = (uint32_t*) &colWidths[0];
r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]);
r->scale = (uint32_t*) & (scale[0]);
r->precision = (uint32_t*) & (precision[0]);
}
@@ -1649,6 +1671,16 @@ inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getCo
return types;
}
inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
{
return charsetNumbers;
}
inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
{
return charsetNumbers[colIndex];
}
inline const std::vector<uint32_t>& RowGroup::getScale() const
{
return scale;

View File

@@ -369,6 +369,11 @@ public:
EXPORT mcsv1Context& operator=(const mcsv1Context& rhs);
EXPORT mcsv1Context& copy(const mcsv1Context& rhs);
// Character collation support
EXPORT void setCharsetNumber(uint32_t csNum);
EXPORT uint32_t getCharsetNumber(); // Returns the unique ID for the language/collation
EXPORT CHARSET_INFO* getCharset();
private:
@@ -392,6 +397,7 @@ private:
int32_t fParamCount;
std::vector<uint32_t> paramKeys;
enum_mariadb_return_type mariadbReturnType;
uint32_t fCharsetNumber;
public:
// For use by the framework
@@ -416,6 +422,7 @@ public:
EXPORT void setParamCount(int32_t paramCount);
std::vector<uint32_t>* getParamKeys();
EXPORT void setMariaDBReturnType(enum_mariadb_return_type rt);
};
// Since aggregate functions can operate on any data type, we use the following structure
@@ -438,7 +445,9 @@ struct ColumnDatum
uint32_t scale; // If dataType is a DECIMAL type
uint32_t precision; // If dataType is a DECIMAL type
std::string alias; // Only filled in for init()
ColumnDatum() : dataType(execplan::CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {};
uint32_t charsetNumber; // For string collations
ColumnDatum() : dataType(execplan::CalpontSystemCatalog::UNDEFINED),
scale(0), precision(-1), charsetNumber(8) {};
};
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
@@ -658,7 +667,8 @@ inline mcsv1Context::mcsv1Context() :
fStartConstant(0),
fEndConstant(0),
func(NULL),
fParamCount(0)
fParamCount(0),
fCharsetNumber(8) // Latin1
{
}
@@ -683,6 +693,7 @@ inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs)
bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference
func = rhs.func;
fParamCount = rhs.fParamCount;
fCharsetNumber = rhs.fCharsetNumber;
return *this;
}
@@ -979,6 +990,16 @@ inline void mcsv1Context::setMariaDBReturnType(enum_mariadb_return_type rt)
mariadbReturnType = rt;
}
inline void mcsv1Context::setCharsetNumber(uint32_t csNum)
{
fCharsetNumber=csNum;
}
inline uint32_t mcsv1Context::getCharsetNumber()
{
return fCharsetNumber;
}
inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
return NOT_IMPLEMENTED;