1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-07 03:22:57 +03:00

MCOL-3536 Collation phase 2

This commit is contained in:
David Hall
2020-06-11 10:10:54 -05:00
parent 171e32a3ff
commit de125bac2b
4 changed files with 115 additions and 130 deletions

View File

@@ -6,3 +6,4 @@
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost #undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h> #include <my_sys.h>
#include <m_ctype.h> #include <m_ctype.h>
#include <myisampack.h>

View File

@@ -41,6 +41,7 @@ using namespace logging;
#include "dataconvert.h" #include "dataconvert.h"
using namespace dataconvert; using namespace dataconvert;
#include "collation.h"
namespace namespace
{ {

View File

@@ -38,39 +38,41 @@ using namespace rowgroup;
#include "errorids.h" #include "errorids.h"
using namespace logging; using namespace logging;
#include "collation.h"
namespace namespace
{ {
// buf must be at least 9 characters since given 64-bit input inline size_t getChar(int32_t num, char*& buf)
// we will convert at most 8 characters and then add the null
inline bool getChar( uint64_t value, char* buf )
{ {
uint32_t cur_offset = 0; // current index into buf char tmp[4];
int cur_bitpos = 56; // 8th octet in input val size_t numBytes = 0;
if (num & 0xFF000000L)
while ( cur_bitpos >= 0 )
{ {
if ( ( ( value >> cur_bitpos ) & 0xff ) != 0 ) mi_int4store(tmp, num);
numBytes = 4;
}
else if (num & 0xFF0000L)
{ {
buf[cur_offset++] = char( ( value >> cur_bitpos ) & 0xff ); mi_int3store(tmp, num);
numBytes = 3;
} }
else if (num & 0xFF00L)
cur_bitpos -= 8; {
mi_int2store(tmp, num);
numBytes = 2;
} }
buf[cur_offset] = '\0';
return true;
}
// see comment above regarding buf assumptions
inline bool getChar( int64_t value, char* buf )
{
if ( value < 0 )
return false;
else else
return getChar( (uint64_t) value, buf ); {
*((int8_t*)buf) = num;
++ buf;
return 1;
}
memcpy(buf, tmp, numBytes);
buf += numBytes;
return numBytes;
} }
} }
namespace funcexp namespace funcexp
@@ -86,40 +88,31 @@ string Func_char::getStrVal(Row& row,
bool& isNull, bool& isNull,
CalpontSystemCatalog::ColType& ct) CalpontSystemCatalog::ColType& ct)
{ {
const int BUF_SIZE = 9; // see comment above for size requirement const int BUF_SIZE = 4 * parm.size();
char buf[BUF_SIZE]; char buf[BUF_SIZE];
buf[0]= 0;
char* pBuf = buf;
CHARSET_INFO* cs = ct.getCharset();
int32_t value;
int32_t numBytes = 0;
for (uint32_t i = 0; i < parm.size(); ++i)
{
ReturnedColumn* rc = (ReturnedColumn*)parm[i]->data();
switch (ct.colDataType) switch (rc->resultType().colDataType)
{ {
case execplan::CalpontSystemCatalog::BIGINT: case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::INT: case execplan::CalpontSystemCatalog::INT:
case execplan::CalpontSystemCatalog::MEDINT: case execplan::CalpontSystemCatalog::MEDINT:
case execplan::CalpontSystemCatalog::TINYINT: case execplan::CalpontSystemCatalog::TINYINT:
case execplan::CalpontSystemCatalog::SMALLINT: case execplan::CalpontSystemCatalog::SMALLINT:
{
int64_t value = parm[0]->data()->getIntVal(row, isNull);
if ( !getChar(value, buf) )
{
isNull = true;
return "";
}
}
break;
case execplan::CalpontSystemCatalog::UBIGINT: case execplan::CalpontSystemCatalog::UBIGINT:
case execplan::CalpontSystemCatalog::UINT: case execplan::CalpontSystemCatalog::UINT:
case execplan::CalpontSystemCatalog::UMEDINT: case execplan::CalpontSystemCatalog::UMEDINT:
case execplan::CalpontSystemCatalog::UTINYINT: case execplan::CalpontSystemCatalog::UTINYINT:
case execplan::CalpontSystemCatalog::USMALLINT: case execplan::CalpontSystemCatalog::USMALLINT:
{ {
uint64_t value = parm[0]->data()->getUintVal(row, isNull); value = rc->getIntVal(row, isNull);
if ( !getChar(value, buf) )
{
isNull = true;
return "";
}
} }
break; break;
@@ -128,47 +121,26 @@ string Func_char::getStrVal(Row& row,
case execplan::CalpontSystemCatalog::TEXT: case execplan::CalpontSystemCatalog::TEXT:
case execplan::CalpontSystemCatalog::DOUBLE: case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE: case execplan::CalpontSystemCatalog::UDOUBLE:
{
double value = parm[0]->data()->getDoubleVal(row, isNull);
if ( !getChar((int64_t)value, buf) )
{
isNull = true;
return "";
}
}
break;
case execplan::CalpontSystemCatalog::FLOAT: case execplan::CalpontSystemCatalog::FLOAT:
case execplan::CalpontSystemCatalog::UFLOAT: case execplan::CalpontSystemCatalog::UFLOAT:
{ {
float value = parm[0]->data()->getFloatVal(row, isNull); double vf = std::round(rc->getDoubleVal(row, isNull));
value = (int32_t)vf;
if ( !getChar((int64_t)value, buf) )
{
isNull = true;
return "";
}
} }
break; break;
case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL:
{ {
IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); IDB_Decimal d = rc->getDecimalVal(row, isNull);
double dscale = d.scale; double dscale = d.scale;
// get decimal and round up // get decimal and round up
int value = d.value / pow(10.0, dscale); value = d.value / pow(10.0, dscale);
int lefto = (d.value - value * pow(10.0, dscale)) / pow(10.0, dscale - 1); int lefto = (d.value - value * pow(10.0, dscale)) / pow(10.0, dscale - 1);
if ( lefto > 4 ) if ( lefto > 4 )
value++; value++;
if ( !getChar((int64_t)value, buf) )
{
isNull = true;
return "";
}
} }
break; break;
@@ -176,30 +148,41 @@ string Func_char::getStrVal(Row& row,
case execplan::CalpontSystemCatalog::DATETIME: case execplan::CalpontSystemCatalog::DATETIME:
case execplan::CalpontSystemCatalog::TIMESTAMP: case execplan::CalpontSystemCatalog::TIMESTAMP:
{ {
isNull = true; continue; // Dates are ignored
return "";
} }
break; break;
default: default:
{ {
std::ostringstream oss; value = 0;
oss << "char: datatype of " << execplan::colDataTypeToString(ct.colDataType); }
throw logging::IDBExcept(oss.str(), ERR_DATATYPE_NOT_SUPPORT);
} }
}
// Bug 5110 : Here the data in col is null. But there might have other
// non-null columns we processed before and we do not want entire value
// to become null. Therefore we set isNull flag to false.
if (isNull) if (isNull)
{ continue;
isNull = false;
return ""; numBytes += getChar(value, pBuf);
} }
return buf; /* Check whether we got a well-formed string */
MY_STRCOPY_STATUS status;
int32_t actualBytes = cs->well_formed_char_length(buf, buf + numBytes, numBytes, &status);
if (UNLIKELY(actualBytes < numBytes))
{
numBytes = actualBytes;
ostringstream os;
os << "Invalid character string for " << cs->csname << ": value = " << hex << buf + actualBytes;
logging::Message::Args args;
logging::Message message(9);
args.add(os.str());
logging::LoggingID logid(28); // Shows as PrimProc, which may not be correct in all cases
logging::Logger logger(logid.fSubsysID);
logger.logMessage(logging::LOG_TYPE_WARNING, message, logid);
// TODO: push warning to client
}
std::string ret(buf, numBytes);
return ret;
} }

View File

@@ -95,7 +95,7 @@ std::string Func_rpad::getStrVal(rowgroup::Row& row,
const char* posP = pad->c_str(); const char* posP = pad->c_str();
// plen = the number of characters in pad // plen = the number of characters in pad
size_t plen = cs->numchars(posP, posP+binPLen); size_t plen = cs->numchars(posP, posP+binPLen);
if (plen == 0 || plen > strLen) if (plen == 0)
return src; return src;
size_t byteCount = (padLength+1) * cs->mbmaxlen; // absolute maximun number of bytes size_t byteCount = (padLength+1) * cs->mbmaxlen; // absolute maximun number of bytes