You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-3536 collation
This commit is contained in:
@ -22,6 +22,10 @@
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
|
||||
#include <string>
|
||||
//#define NDEBUG
|
||||
#include <cassert>
|
||||
@ -180,20 +184,21 @@ inline uint64_t simple_case_cmp(Row& row,
|
||||
case execplan::CalpontSystemCatalog::VARCHAR:
|
||||
{
|
||||
const string& ev = parm[n]->data()->getStrVal(row, isNull);
|
||||
|
||||
if (isNull)
|
||||
break;
|
||||
CHARSET_INFO* cs = parm[n]->data()->resultType().getCharset();
|
||||
|
||||
for (i = 1; i <= whereCount; i++)
|
||||
{
|
||||
//BUG 5362
|
||||
if (utf8::idb_strcoll(ev.c_str(), parm[i]->data()->getStrVal(row, isNull).c_str()) == 0 && !isNull)
|
||||
const string& p1 = parm[i]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
break;
|
||||
if (cs->strnncoll(ev.c_str(), ev.length(), p1.c_str(), p1.length()) == 0)
|
||||
{
|
||||
foundIt = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
isNull = false;
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -49,7 +49,7 @@ namespace funcexp
|
||||
|
||||
CalpontSystemCatalog::ColType Func_char_length::operationType( FunctionParm& fp, CalpontSystemCatalog::ColType& resultType )
|
||||
{
|
||||
return resultType;
|
||||
return fp[0]->data()->resultType();
|
||||
}
|
||||
|
||||
int64_t Func_char_length::getIntVal(rowgroup::Row& row,
|
||||
@ -86,8 +86,7 @@ int64_t Func_char_length::getIntVal(rowgroup::Row& row,
|
||||
return 0;
|
||||
const char* b = tstr.c_str();
|
||||
const char* e = tstr.c_str() + tstr.length();
|
||||
const CHARSET_INFO* cs = get_charset(parm[0]->data()->resultType().charsetNumber, MYF(MY_WME));
|
||||
return (int64_t)cs->numchars(b, e);
|
||||
return (int64_t)parm[0]->data()->resultType().getCharset()->numchars(b, e);
|
||||
}
|
||||
|
||||
case execplan::CalpontSystemCatalog::DATE:
|
||||
|
@ -22,6 +22,10 @@
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
@ -148,6 +152,7 @@ std::string Func_greatest::getStrVal(rowgroup::Row& row,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
const string& str = fp[0]->data()->getStrVal(row, isNull);
|
||||
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
|
||||
|
||||
string greatestStr = str;
|
||||
|
||||
@ -155,12 +160,10 @@ std::string Func_greatest::getStrVal(rowgroup::Row& row,
|
||||
{
|
||||
const string& str1 = fp[i]->data()->getStrVal(row, isNull);
|
||||
|
||||
int tmp = utf8::idb_strcoll(greatestStr.c_str(), str1.c_str());
|
||||
|
||||
if ( tmp < 0 )
|
||||
|
||||
// if ( greatestStr < str1 )
|
||||
if (cs->strnncoll(greatestStr.c_str(), greatestStr.length(), str1.c_str(), str1.length()) < 0)
|
||||
{
|
||||
greatestStr = str1;
|
||||
}
|
||||
}
|
||||
|
||||
return greatestStr;
|
||||
|
@ -22,6 +22,10 @@
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
@ -52,11 +56,6 @@ inline bool numericEQ(result_t op1, result_t op2)
|
||||
return op1 == op2;
|
||||
}
|
||||
|
||||
inline bool strEQ(string op1, string op2)
|
||||
{
|
||||
return utf8::idb_strcoll(op1.c_str(), op2.c_str()) == 0;
|
||||
}
|
||||
|
||||
inline bool getBoolForIn(rowgroup::Row& row,
|
||||
funcexp::FunctionParm& pm,
|
||||
bool& isNull,
|
||||
@ -273,15 +272,16 @@ inline bool getBoolForIn(rowgroup::Row& row,
|
||||
case execplan::CalpontSystemCatalog::TEXT:
|
||||
{
|
||||
const string& val = pm[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
if (isNull)
|
||||
return false;
|
||||
|
||||
CHARSET_INFO* cs = pm[0]->data()->resultType().getCharset();
|
||||
|
||||
for (uint32_t i = 1; i < pm.size(); i++)
|
||||
{
|
||||
isNull = false;
|
||||
|
||||
if ( utf8::idb_strcoll(val.c_str(), pm[i]->data()->getStrVal(row, isNull).c_str()) == 0 && !isNull)
|
||||
const string& str1 = pm[i]->data()->getStrVal(row, isNull);
|
||||
if (cs->strnncoll(val.c_str(), val.length(), str1.c_str(), str1.length()) == 0 && !isNull)
|
||||
return true;
|
||||
|
||||
if (isNull && isNotIn)
|
||||
|
@ -20,6 +20,10 @@
|
||||
*
|
||||
*
|
||||
****************************************************************************/
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
#include <m_ctype.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
@ -42,37 +46,50 @@ CalpontSystemCatalog::ColType Func_instr::operationType( FunctionParm& fp, Calpo
|
||||
return ct;
|
||||
}
|
||||
|
||||
size_t Func_instr::in_str(const string& str, const string& substr, size_t start)
|
||||
{
|
||||
// convert both inputs to wide character strings
|
||||
std::wstring wcstr = utf8::utf8_to_wstring(str);
|
||||
std::wstring wcsubstr = utf8::utf8_to_wstring(substr);
|
||||
|
||||
if ((str.length() && !wcstr.length()) ||
|
||||
(substr.length() && !wcsubstr.length()))
|
||||
// this means one or both of the strings had conversion errors to wide character
|
||||
return 0;
|
||||
|
||||
size_t pos = wcstr.find(wcsubstr, start - 1);
|
||||
return (pos != string::npos ? pos + 1 : 0);
|
||||
}
|
||||
|
||||
int64_t Func_instr::getIntVal(rowgroup::Row& row,
|
||||
FunctionParm& parm,
|
||||
bool& isNull,
|
||||
CalpontSystemCatalog::ColType&)
|
||||
CalpontSystemCatalog::ColType& colType)
|
||||
{
|
||||
uint64_t start = 1;
|
||||
|
||||
if (parm.size() == 3)
|
||||
start = parm[2]->data()->getIntVal(row, isNull);
|
||||
|
||||
if (isNull || start == 0)
|
||||
int64_t start = 0;
|
||||
int64_t start0= 0;
|
||||
my_match_t match;
|
||||
|
||||
const std::string& str = parm[0]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return 0;
|
||||
const char* s1 = str.c_str();
|
||||
uint32_t l1 = (uint32_t)str.length();
|
||||
|
||||
const std::string& substr =parm[1]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return 0;
|
||||
|
||||
//Bug 5110 : to support utf8 char type, we have to convert and search
|
||||
return in_str(parm[0]->data()->getStrVal(row, isNull), parm[1]->data()->getStrVal(row, isNull), start);
|
||||
const char* s2 = substr.c_str();
|
||||
uint32_t l2 = (uint32_t)substr.length();
|
||||
if (l2 < 1)
|
||||
return start + 1;
|
||||
|
||||
CHARSET_INFO* cs = colType.getCharset();
|
||||
|
||||
if (parm.size() == 3)
|
||||
{
|
||||
start0 = start = parm[2]->data()->getIntVal(row, isNull) - 1;
|
||||
|
||||
if ((start < 0) || (start > l1))
|
||||
return 0;
|
||||
|
||||
start = (int64_t)cs->charpos(s1, s1+l1, start); // adjust start for multi-byte
|
||||
|
||||
if (start + l2 > l1) // Substring is longer than str at pos.
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!cs->instr(s1+start, l1-start,
|
||||
s2, l2,
|
||||
&match, 1))
|
||||
return 0;
|
||||
return (int64_t)match.mb_len + start0 + 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -20,6 +20,10 @@
|
||||
*
|
||||
*
|
||||
****************************************************************************/
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
#include <m_ctype.h>
|
||||
|
||||
#include <string>
|
||||
using namespace std;
|
||||
@ -56,31 +60,22 @@ CalpontSystemCatalog::ColType Func_lcase::operationType(FunctionParm& fp, Calpon
|
||||
std::string Func_lcase::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
execplan::CalpontSystemCatalog::ColType& colType)
|
||||
{
|
||||
// string str = fp[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
// transform (str.begin(), str.end(), str.begin(), to_lower());
|
||||
|
||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
if (isNull)
|
||||
return "";
|
||||
|
||||
size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
|
||||
wchar_t* wcbuf = new wchar_t[strwclen];
|
||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
|
||||
wstring wstr(wcbuf, strwclen);
|
||||
CHARSET_INFO* cs = colType.getCharset();
|
||||
uint64_t inLen = tstr.length();
|
||||
uint64_t bufLen= inLen * cs->casedn_multiply;
|
||||
char* outBuf = new char[bufLen];
|
||||
|
||||
uint64_t outLen = cs->casedn(tstr.c_str(), inLen, outBuf, bufLen);
|
||||
|
||||
for (uint32_t i = 0; i < strwclen; i++)
|
||||
wstr[i] = std::towlower(wstr[i]);
|
||||
|
||||
size_t strmblen = utf8::idb_wcstombs(0, wstr.c_str(), 0) + 1;
|
||||
char* outbuf = new char[strmblen];
|
||||
strmblen = utf8::idb_wcstombs(outbuf, wstr.c_str(), strmblen);
|
||||
std::string ret(outbuf, strmblen);
|
||||
delete [] outbuf;
|
||||
delete [] wcbuf;
|
||||
string ret = string(outBuf, outLen);
|
||||
delete [] outBuf;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,10 @@
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
@ -127,17 +131,16 @@ std::string Func_least::getStrVal(rowgroup::Row& row,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
string leastStr = fp[0]->data()->getStrVal(row, isNull);
|
||||
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
|
||||
|
||||
for (uint32_t i = 1; i < fp.size(); i++)
|
||||
{
|
||||
const string& str1 = fp[i]->data()->getStrVal(row, isNull);
|
||||
|
||||
int tmp = utf8::idb_strcoll(leastStr.c_str(), str1.c_str());
|
||||
|
||||
if ( tmp > 0 )
|
||||
|
||||
// if ( leastStr > str1 )
|
||||
if (cs->strnncoll(leastStr.c_str(), leastStr.length(), str1.c_str(), str1.length()) > 0)
|
||||
{
|
||||
leastStr = str1;
|
||||
}
|
||||
}
|
||||
|
||||
return leastStr;
|
||||
|
@ -22,6 +22,11 @@
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#undef LONGLONG_MIN
|
||||
#include <my_sys.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
@ -363,6 +368,7 @@ string Func_nullif::getStrVal(rowgroup::Row& row,
|
||||
CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
string exp1 = parm[0]->data()->getStrVal(row, isNull);
|
||||
CHARSET_INFO* cs = parm[0]->data()->resultType().getCharset();
|
||||
|
||||
if (isNull)
|
||||
{
|
||||
@ -395,7 +401,7 @@ string Func_nullif::getStrVal(rowgroup::Row& row,
|
||||
exp2 = exp2 + " 00:00:00";
|
||||
}
|
||||
|
||||
if ( utf8::idb_strcoll(exp1.c_str(), exp2.c_str()) == 0 )
|
||||
if (cs->strnncoll(exp1.c_str(), exp1.length(), exp2.c_str(), exp2.length()) == 0)
|
||||
{
|
||||
isNull = true;
|
||||
return "";
|
||||
|
@ -21,6 +21,10 @@
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
@ -39,6 +43,10 @@ using namespace joblist;
|
||||
#include "utils_utf8.h"
|
||||
using namespace funcexp;
|
||||
|
||||
// Because including my_sys.h in a Columnstore header causes too many conflicts
|
||||
struct charset_info_st;
|
||||
typedef const struct charset_info_st CHARSET_INFO;
|
||||
|
||||
class to_lower
|
||||
{
|
||||
public:
|
||||
@ -64,10 +72,11 @@ int64_t Func_strcmp::getIntVal(rowgroup::Row& row,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
|
||||
const string& str = fp[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
const string& str1 = fp[1]->data()->getStrVal(row, isNull);
|
||||
int ret = utf8::idb_strcoll(str.c_str(), str1.c_str());
|
||||
|
||||
int ret = cs->strnncoll(str.c_str(), str.length(), str1.c_str(), str1.length());
|
||||
// mysql's strcmp returns only -1, 0, and 1
|
||||
return (ret < 0 ? -1 : (ret > 0 ? 1 : 0));
|
||||
}
|
||||
|
@ -20,6 +20,10 @@
|
||||
*
|
||||
*
|
||||
****************************************************************************/
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
#include <m_ctype.h>
|
||||
|
||||
#include <string>
|
||||
using namespace std;
|
||||
@ -55,31 +59,22 @@ CalpontSystemCatalog::ColType Func_ucase::operationType(FunctionParm& fp, Calpon
|
||||
std::string Func_ucase::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
execplan::CalpontSystemCatalog::ColType& colType)
|
||||
{
|
||||
// string str = fp[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
// transform (str.begin(), str.end(), str.begin(), to_lower());
|
||||
|
||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
if (isNull)
|
||||
return "";
|
||||
|
||||
size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
|
||||
wchar_t* wcbuf = new wchar_t[strwclen];
|
||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
|
||||
wstring wstr(wcbuf, strwclen);
|
||||
CHARSET_INFO* cs = colType.getCharset();
|
||||
uint64_t inLen = tstr.length();
|
||||
uint64_t bufLen= inLen * cs->caseup_multiply;
|
||||
char* outBuf = new char[bufLen];
|
||||
|
||||
uint64_t outLen = cs->caseup(tstr.c_str(), inLen, outBuf, bufLen);
|
||||
|
||||
for (uint32_t i = 0; i < strwclen; i++)
|
||||
wstr[i] = std::towupper(wstr[i]);
|
||||
|
||||
size_t strmblen = utf8::idb_wcstombs(0, wstr.c_str(), 0) + 1;
|
||||
char* outbuf = new char[strmblen];
|
||||
strmblen = utf8::idb_wcstombs(outbuf, wstr.c_str(), strmblen);
|
||||
std::string ret(outbuf, strmblen);
|
||||
delete [] outbuf;
|
||||
delete [] wcbuf;
|
||||
string ret = string(outBuf, outLen);
|
||||
delete [] outBuf;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -84,8 +84,6 @@ public:
|
||||
|
||||
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType);
|
||||
|
||||
size_t in_str(const std::string& str, const std::string& substr, size_t start);
|
||||
|
||||
int64_t getIntVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
|
@ -1,303 +0,0 @@
|
||||
/* Copyright (C) 2014 InfiniDB, Inc.
|
||||
* Copyright (C) 2016 MariaDB Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
// $Id$
|
||||
|
||||
|
||||
#ifndef _UTILS_UTF8_H_
|
||||
#define _UTILS_UTF8_H_
|
||||
|
||||
|
||||
|
||||
#include <string>
|
||||
#if defined(_MSC_VER)
|
||||
#include <malloc.h>
|
||||
#include <windows.h>
|
||||
#elif defined(__FreeBSD__)
|
||||
//#include <cstdlib>
|
||||
#else
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
#include <cstdlib>
|
||||
|
||||
#include <clocale>
|
||||
#include "liboamcpp.h"
|
||||
|
||||
/** @file */
|
||||
|
||||
namespace funcexp
|
||||
{
|
||||
namespace utf8
|
||||
{
|
||||
extern bool JPcodePoint; // code point ordering (Japanese UTF) flag, used in idb_strcoll
|
||||
|
||||
const int MAX_UTF8_BYTES_PER_CHAR = 4;
|
||||
|
||||
// A global loc object so we don't construct one at every compare
|
||||
extern std::locale loc;
|
||||
// Is there a way to construct a global reference to a facet?
|
||||
// const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
|
||||
|
||||
//Infinidb version of strlocale BUG 5362
|
||||
//set System Locale "C" by default
|
||||
//return the system Locale currently set in from Columnstore.xml
|
||||
inline
|
||||
std::string idb_setlocale()
|
||||
{
|
||||
// get and set locale language
|
||||
std::string systemLang("C");
|
||||
oam::Oam oam;
|
||||
static bool loggedMsg = false;
|
||||
|
||||
try
|
||||
{
|
||||
oam.getSystemConfig("SystemLang", systemLang);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
systemLang = "C";
|
||||
}
|
||||
|
||||
char* pLoc = setlocale(LC_ALL, systemLang.c_str());
|
||||
|
||||
if (pLoc == NULL)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!loggedMsg)
|
||||
{
|
||||
//send alarm
|
||||
alarmmanager::ALARMManager alarmMgr;
|
||||
std::string alarmItem = "system";
|
||||
alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::SET);
|
||||
|
||||
// Log one line
|
||||
logging::LoggingID lid(17); // ProcessManager -- probably the only one to find this for now
|
||||
logging::MessageLog ml(lid);
|
||||
logging::Message msg(1);
|
||||
logging::Message::Args args;
|
||||
args.add("Failed to set locale ");
|
||||
args.add(systemLang.c_str());
|
||||
args.add(": Setting to 'C'. Critical alarm generated");
|
||||
msg.format( args );
|
||||
ml.logErrorMessage(msg);
|
||||
|
||||
loggedMsg = true;
|
||||
}
|
||||
systemLang = "C";
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// Ignoring for time being.
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
//send alarm
|
||||
alarmmanager::ALARMManager alarmMgr;
|
||||
std::string alarmItem = "system";
|
||||
alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::CLEAR);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// Ignoring for time being.
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
printf ("Locale is : %s\n", systemLang.c_str() );
|
||||
|
||||
//BUG 2991
|
||||
setlocale(LC_NUMERIC, "C");
|
||||
|
||||
if (systemLang.find("ja_JP") != std::string::npos)
|
||||
JPcodePoint = true;
|
||||
|
||||
// MCOL-1559 Save off the locale to save runtime cpus
|
||||
std::locale localloc(systemLang.c_str());
|
||||
loc = localloc;
|
||||
|
||||
return systemLang;
|
||||
}
|
||||
|
||||
// Infinidb version of strcoll. BUG 5362
|
||||
// strcoll() comparison while ja_JP.utf8 does not give correct results.
|
||||
// For correct results strcmp() can be used.
|
||||
inline
|
||||
int idb_strcoll(const char* str1, const char* str2)
|
||||
{
|
||||
if (JPcodePoint)
|
||||
return strcmp(str1, str2);
|
||||
else
|
||||
return strcoll(str1, str2);
|
||||
}
|
||||
|
||||
// MCOL-1559 Add a trimmed version of strcoll
|
||||
// The intent here is to make no copy of the original strings and
|
||||
// not modify them, so we can't use trim to deal with the spaces.
|
||||
inline
|
||||
int idb_strtrimcoll(const std::string& str1, const std::string& str2)
|
||||
{
|
||||
static const std::string whitespaces (" ");
|
||||
const char* s1 = str1.c_str();
|
||||
const char* s2 = str2.c_str();
|
||||
|
||||
// Set found1 to the last non-whitespace char in str1
|
||||
std::size_t found1 = str1.find_last_not_of(whitespaces);
|
||||
// Set found2 to the first whitespace char in str2
|
||||
std::size_t found2 = str2.find_last_not_of(whitespaces);
|
||||
|
||||
// Are both strings empty or all whitespace?
|
||||
if (found1 == std::string::npos && found2 == std::string::npos)
|
||||
{
|
||||
return 0; // they match
|
||||
}
|
||||
// If str1 is empty or all spaces
|
||||
if (found1 == std::string::npos)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
// If str2 is empty or all spaces
|
||||
if (found2 == std::string::npos)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
// found1 and found2 point to the character that is not a space.
|
||||
// compare wants it to point to one past.
|
||||
found1 += 1;
|
||||
found2 += 1;
|
||||
// If no trimming needs doing, then strcoll is faster
|
||||
if (found1 == str1.size() && found2 == str2.size())
|
||||
{
|
||||
return idb_strcoll(s1, s2);
|
||||
}
|
||||
// Compare the (trimmed) strings
|
||||
const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
|
||||
int rtn = coll.compare(s1, s1+found1, s2, s2+found2);
|
||||
return rtn;
|
||||
}
|
||||
|
||||
// BUG 5241
|
||||
// Infinidb specific mbstowcs(). This will handle both windows and unix platforms
|
||||
// Params dest and max should have enough length to accomodate NULL
|
||||
inline
|
||||
size_t idb_mbstowcs(wchar_t* dest, const char* src, size_t max)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
// 4th param (-1) denotes to convert till hit NULL char
|
||||
// if 6th param max = 0, will return the required buffer size
|
||||
size_t strwclen = MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, (int)max);
|
||||
// decrement the count of NULL; will become -1 on failure
|
||||
return --strwclen;
|
||||
|
||||
#else
|
||||
return mbstowcs(dest, src, max);
|
||||
#endif
|
||||
}
|
||||
|
||||
// BUG 5241
|
||||
// Infinidb specific wcstombs(). This will handle both windows and unix platforms
|
||||
// Params dest and max should have enough length to accomodate NULL
|
||||
inline
|
||||
size_t idb_wcstombs(char* dest, const wchar_t* src, size_t max)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
// 4th param (-1) denotes to convert till hit NULL char
|
||||
//if 6th param max = 0, will return the required buffer size
|
||||
size_t strmblen = WideCharToMultiByte( CP_UTF8, 0, src, -1, dest, (int)max, NULL, NULL);
|
||||
// decrement the count of NULL; will become -1 on failure
|
||||
return --strmblen;
|
||||
#else
|
||||
return wcstombs(dest, src, max);
|
||||
#endif
|
||||
}
|
||||
|
||||
// convert UTF-8 string to wstring
|
||||
inline
|
||||
std::wstring utf8_to_wstring (const std::string& str)
|
||||
{
|
||||
size_t bufsize = str.length() + 1;
|
||||
|
||||
// Convert to wide characters. Do all further work in wide characters
|
||||
wchar_t* wcbuf = new wchar_t[bufsize];
|
||||
// Passing +1 so that windows is happy to see extra position to place NULL
|
||||
size_t strwclen = idb_mbstowcs(wcbuf, str.c_str(), str.length() + 1);
|
||||
|
||||
// if result is -1 it means bad characters which may happen if locale is wrong.
|
||||
// return an empty string
|
||||
if ( strwclen == static_cast<size_t>(-1) )
|
||||
strwclen = 0;
|
||||
|
||||
std::wstring ret(wcbuf, strwclen);
|
||||
|
||||
delete [] wcbuf;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// convert wstring to UTF-8 string
|
||||
inline
|
||||
std::string wstring_to_utf8 (const std::wstring& str)
|
||||
{
|
||||
char* outbuf = new char[(str.length() * MAX_UTF8_BYTES_PER_CHAR) + 1];
|
||||
// Passing +1 so that windows is happy to see extra position to place NULL
|
||||
size_t strmblen = idb_wcstombs(outbuf, str.c_str(), str.length() * MAX_UTF8_BYTES_PER_CHAR + 1);
|
||||
|
||||
// if result is -1 it means bad characters which may happen if locale is wrong.
|
||||
// return an empty string
|
||||
if ( strmblen == static_cast<size_t>(-1) )
|
||||
strmblen = 0;
|
||||
|
||||
std::string ret(outbuf, strmblen);
|
||||
|
||||
delete [] outbuf;
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline
|
||||
uint8_t utf8_truncate_point(const char* input, size_t length)
|
||||
{
|
||||
// Find the beginning of a multibyte char to truncate at and return the
|
||||
// number of bytes to truncate
|
||||
if (length < 3)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const unsigned char* b = (const unsigned char*)(input) + length - 3;
|
||||
|
||||
if (b[2] & 0x80)
|
||||
{
|
||||
// First byte in a new multi-byte sequence
|
||||
if (b[2] & 0x40) return 1;
|
||||
// 3 byte sequence
|
||||
else if ((b[1] & 0xe0) == 0xe0) return 2;
|
||||
// 4 byte sequence
|
||||
else if ((b[0] & 0xf0) == 0xf0) return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} //namespace utf8
|
||||
} //namespace funcexp
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user