1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-3536 Collation

This commit is contained in:
David Hall
2020-06-03 19:43:53 -05:00
parent 39a93ef753
commit 889094a23d
16 changed files with 211 additions and 151 deletions

View File

@ -6672,7 +6672,8 @@ int processLimitAndOffset(
} }
// We don't currently support limit with correlated subquery // We don't currently support limit with correlated subquery
if (gwi.subQuery && !gwi.correlatedTbNameVec.empty() && csep->hasOrderBy()) if (csep->limitNum() != (uint64_t) - 1 &&
gwi.subQuery && !gwi.correlatedTbNameVec.empty())
{ {
gwi.fatalParseError = true; gwi.fatalParseError = true;
gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_LIMIT_SUB); gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_LIMIT_SUB);

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <cstdlib> #include <cstdlib>
#include <string> #include <string>
@ -56,37 +59,58 @@ int64_t Func_find_in_set::getIntVal(rowgroup::Row& row,
CalpontSystemCatalog::ColType& op_ct) CalpontSystemCatalog::ColType& op_ct)
{ {
const string& searchStr = parm[0]->data()->getStrVal(row, isNull); const string& searchStr = parm[0]->data()->getStrVal(row, isNull);
if (isNull) if (isNull)
return 0; return 0;
const string& setString = parm[1]->data()->getStrVal(row, isNull); const string& setString = parm[1]->data()->getStrVal(row, isNull);
if (isNull) if (isNull)
return 0; return 0;
if (searchStr.find(",") != string::npos) if (searchStr.find(",") != string::npos)
return 0; return 0;
string newSearchStr(searchStr.substr(0, strlen(searchStr.c_str()))); if (setString.length() > searchStr.length())
string newSetString(setString.substr(0, strlen(setString.c_str()))); return 0;
//tokenize the setStr with comma as seprator.
typedef boost::tokenizer<boost::char_separator<char> > tokenizer; CHARSET_INFO *cs= op_ct.getCharset();
boost::char_separator<char> sep( ",");
tokenizer tokens(newSetString, sep);
unsigned i = 0; my_wc_t wc= 0;
size_t pos = 0; const char *str_begin= setString.c_str();
const char *str_end= setString.c_str();
for (tokenizer::iterator tok_iter = tokens.begin(); tok_iter != tokens.end(); ++tok_iter) const char *real_end= str_end + setString.length();
const char *find_str= searchStr.c_str();
uint find_str_len= searchStr.length();
int position= 0;
static const char separator=',';
while (1)
{ {
pos = (*tok_iter).find(newSearchStr); int symbol_len;
i++; if ((symbol_len= cs->mb_wc(&wc, (uchar*) str_end,
(uchar*) real_end)) > 0)
if (( pos != string::npos) && (newSearchStr.length() == (*tok_iter).length())) {
return i; const char *substr_end= str_end + symbol_len;
bool is_last_item= (substr_end == real_end);
bool is_separator= (wc == (my_wc_t) separator);
if (is_separator || is_last_item)
{
position++;
if (is_last_item && !is_separator)
str_end= substr_end;
if (!cs->strnncoll(str_begin, (uint) (str_end - str_begin),
find_str, find_str_len))
return (longlong) position;
else
str_begin= substr_end;
}
str_end= substr_end;
}
else if (str_end - str_begin == 0 &&
find_str_len == 0 &&
wc == (my_wc_t) separator)
return (longlong) ++position;
else
return 0;
} }
return 0; return 0;
} }

View File

@ -53,22 +53,22 @@ bool boolVal(SPTP& parm, Row& row, const string& timeZone)
case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::VARCHAR:
ret = (atoi((char*)(parm->data()->getStrVal(timeZone).c_str())) != 0); ret = (atoi((char*)(parm->data()->getStrVal(timeZone).c_str())) != 0);
break;
case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT: case CalpontSystemCatalog::UFLOAT:
ret = (parm->data()->getFloatVal(row, isNull) != 0); ret = (parm->data()->getFloatVal(row, isNull) != 0);
break;
case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE: case CalpontSystemCatalog::UDOUBLE:
ret = (parm->data()->getDoubleVal(row, isNull) != 0); ret = (parm->data()->getDoubleVal(row, isNull) != 0);
break;
case CalpontSystemCatalog::LONGDOUBLE: case CalpontSystemCatalog::LONGDOUBLE:
ret = (parm->data()->getLongDoubleVal(row, isNull) != 0); ret = (parm->data()->getLongDoubleVal(row, isNull) != 0);
break;
case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::DECIMAL:
case CalpontSystemCatalog::UDECIMAL: case CalpontSystemCatalog::UDECIMAL:
ret = (parm->data()->getDecimalVal(row, isNull).value != 0); ret = (parm->data()->getDecimalVal(row, isNull).value != 0);
break;
case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::MEDINT:
@ -83,6 +83,7 @@ bool boolVal(SPTP& parm, Row& row, const string& timeZone)
case CalpontSystemCatalog::TIME: case CalpontSystemCatalog::TIME:
default: default:
ret = (parm->data()->getIntVal(row, isNull) != 0); ret = (parm->data()->getIntVal(row, isNull) != 0);
break;
} }
} }

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;
@ -48,69 +51,61 @@ CalpontSystemCatalog::ColType Func_insert::operationType(FunctionParm& fp, Calpo
return fp[0]->data()->resultType(); return fp[0]->data()->resultType();
} }
string insertStr(const string& src, int pos, int len, const string& targ)
{
int64_t strLen = static_cast<int64_t>(src.length());
if ((pos <= 0) || ((pos - 1) >= strLen))
return src;
if ((len < 0) || (len > strLen))
len = strLen;
const char* srcptr = src.c_str();
advance(srcptr, pos - 1, srcptr + strLen);
// srcptr now pointing to where we need to insert targ string
uint32_t srcPos = srcptr - src.c_str();
uint32_t finPos = strLen;
const char* finptr = src.c_str();
if ((strLen - (pos - 1 + len)) >= 0)
{
advance(finptr, (pos - 1 + len), finptr + strLen);
// finptr now pointing to the end of the string to replace
finPos = finptr - src.c_str();
}
string out;
out.reserve(srcPos + targ.length() + strLen - finPos + 1);
out.append( src.c_str(), srcPos );
out.append( targ.c_str(), targ.length() );
out.append( src.c_str() + finPos, strLen - finPos );
return out;
}
std::string Func_insert::getStrVal(rowgroup::Row& row, std::string Func_insert::getStrVal(rowgroup::Row& row,
FunctionParm& fp, FunctionParm& fp,
bool& isNull, bool& isNull,
execplan::CalpontSystemCatalog::ColType&) execplan::CalpontSystemCatalog::ColType&)
{ {
string tstr; string src;
string tnewstr; string tnewstr;
stringValue(fp[0], row, isNull, tstr); int64_t start, length;
stringValue(fp[0], row, isNull, src);
if (isNull) if (isNull)
{
return ""; return "";
}
stringValue(fp[3], row, isNull, tnewstr); stringValue(fp[3], row, isNull, tnewstr);
if (isNull) if (isNull)
return ""; return "";
int64_t pos = fp[1]->data()->getIntVal(row, isNull); start = fp[1]->data()->getIntVal(row, isNull);
if (isNull) if (isNull)
return ""; return "";
int64_t len = fp[2]->data()->getIntVal(row, isNull); length = fp[2]->data()->getIntVal(row, isNull);
if (isNull) if (isNull)
return ""; return "";
return insertStr( tstr, pos, len, tnewstr ); start--; // Because SQL syntax is 1 based and we want 0 based.
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
// binLen represents the number of bytes
int64_t binLen = static_cast<int64_t>(src.length());
const char* pos = src.c_str();
const char* end = pos + binLen;
// strLen is number of characters
int64_t strLen = cs->numchars(pos, end);
// Return the original string if start isn't within the string.
if ((start <= 1) || start >= strLen)
return src;
if ((length < 0) || (length > strLen))
length = strLen;
// Convert start and length from characters to bytes.
start = cs->charpos(pos, end, start);
length = cs->charpos(pos+start, end, length);
string out;
out.reserve(binLen - length + tnewstr.length() + 1);
out.append(src.c_str(), start);
out.append(tnewstr.c_str(), tnewstr.length());
out.append(src.c_str() + start + length, binLen - start - length);
return out;
} }

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <cstdlib> #include <cstdlib>
#include <string> #include <string>

View File

@ -20,6 +20,10 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include "errorids.h" #include "errorids.h"
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -21,6 +21,10 @@
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -20,6 +20,10 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include "errorids.h" #include "errorids.h"
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <mariadb.h> #include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost #undef set_bits // mariadb.h defines set_bits, which is incompatible with boost

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -21,6 +21,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;

View File

@ -20,6 +20,9 @@
* *
* *
****************************************************************************/ ****************************************************************************/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <string> #include <string>
using namespace std; using namespace std;
@ -47,106 +50,106 @@ CalpontSystemCatalog::ColType Func_trim::operationType(FunctionParm& fp, Calpont
std::string Func_trim::getStrVal(rowgroup::Row& row, std::string Func_trim::getStrVal(rowgroup::Row& row,
FunctionParm& fp, FunctionParm& fp,
bool& isNull, bool& isNull,
execplan::CalpontSystemCatalog::ColType&) execplan::CalpontSystemCatalog::ColType& type)
{ {
// The number of characters (not bytes) in our input tstr. CHARSET_INFO* cs = type.getCharset();
// Not all of these are necessarily significant. We need to search for the
// NULL terminator to be sure.
size_t strwclen;
// this holds the number of characters (not bytes) in ourtrim tstr.
size_t trimwclen;
// The original string // The original string
const string& tstr = fp[0]->data()->getStrVal(row, isNull); const string& src = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
if (src.empty() || src.length() == 0)
return src;
// binLen represents the number of bytes in src
size_t binLen = src.length();
const char* pos = src.c_str();
const char* end = pos + binLen;
// strLen = the number of characters in src
size_t strLen = cs->numchars(pos, end);
// The trim characters. // The trim characters.
const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " "); const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
// binTLen represents the number of bytes in trim
size_t binTLen = trim.length();
const char* posT = trim.c_str();
// strTLen = the number of characters in trim
size_t strTLen = cs->numchars(posT, posT+binTLen);
if (strTLen == 0 || strTLen > strLen)
return src;
if (isNull) if (binTLen == 1)
return "";
if (tstr.empty() || tstr.length() == 0)
return tstr;
// Rather than calling the wideconvert functions with a null buffer to
// determine the size of buffer to allocate, we can be sure the wide
// char string won't be longer than:
strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
int bufsize = strwclen + 1;
// Convert the string to wide characters. Do all further work in wide characters
wchar_t* wcbuf = new wchar_t[bufsize];
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
// Bad char in mbc can return -1
if (strwclen == static_cast<size_t>(-1))
strwclen = 0;
// Convert the trim string to wide
trimwclen = trim.length(); // A guess to start.
int trimbufsize = trimwclen + 1;
wchar_t* wctrim = new wchar_t[trimbufsize];
size_t trimlen = utf8::idb_mbstowcs(wctrim, trim.c_str(), trimwclen + 1);
// Bad char in mbc can return -1
if (trimlen == static_cast<size_t>(-1))
trimlen = 0;
size_t trimCmpLen = trimlen * sizeof(wchar_t);
const wchar_t* oPtr = wcbuf; // To remember the start of the string
const wchar_t* aPtr = oPtr;
const wchar_t* aEnd = wcbuf + strwclen - 1;
size_t trimCnt = 0;
if (trimlen > 0)
{ {
if (trimlen == 1) // If the trim string is 1 byte, don't waste cpu for memcmp
// Trim leading
while (pos < end && *pos == *posT)
{ {
// If trim is a single char, then don't spend the overhead for memcmp. ++pos;
wchar_t chr = wctrim[0]; --binLen;
// remove leading
while (aPtr != aEnd && *aPtr == chr)
{
aPtr++;
++trimCnt;
}
// remove trailing
while (aEnd != aPtr && *aEnd == chr)
{
aEnd--;
++trimCnt;
}
} }
else // Trim trailing
while (end > pos && *end == *posT)
{ {
aEnd -= (trimlen - 1); // So we don't compare past the end of the string. --end;
--binLen;
// remove leading }
while (aPtr <= aEnd && !memcmp(aPtr, wctrim, trimCmpLen)) }
else if (!cs->use_mb())
{
// This is a one byte per char charset with multiple char trim.
// Trim leading
while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
{
pos += binTLen;
binLen -= binTLen;
}
// Trim trailing
while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
{
end -= binTLen;
binLen -= binTLen;
}
}
else
{
// We're using a multi-byte charset
// Trim leading is easy
while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
{
pos += binTLen;
binLen -= binTLen;
}
// Trim trailing
// The problem is that the byte pattern at the end could
// match memcmp, but not be correct since the first byte compared
// may actually be a second or later byte from a previous char.
// We start at the beginning of the string and move forward
// one character at a time until we reach the end. Then we can
// safely compare.
while (end - binTLen >= pos)
{
const char* p = pos;
uint32 l;
while (p + binTLen < end)
{ {
aPtr += trimlen; if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
trimCnt += trimlen; p += l;
else
++p;
} }
if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
// remove trailing
while (aPtr <= aEnd && !memcmp(aEnd, wctrim, trimCmpLen))
{ {
aEnd -= trimlen; //BUG 5241 end -= binTLen;
trimCnt += trimlen; binLen -= binTLen;
}
else
{
break; // We've run out of places to look
} }
} }
} }
// Bug 5110 - error in allocating enough memory for utf8 chars
size_t aLen = strwclen - trimCnt;
wstring trimmed = wstring(aPtr, aLen);
// Turn back to a string // Turn back to a string
std::string ret(utf8::wstring_to_utf8(trimmed.c_str())); std::string ret(pos, binLen);
delete [] wctrim;
delete [] wcbuf;
return ret; return ret;
} }