You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-27 16:01:57 +03:00
304 lines
8.4 KiB
C++
304 lines
8.4 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
* Copyright (C) 2016 MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
// $Id$
|
|
|
|
|
|
#ifndef _UTILS_UTF8_H_
|
|
#define _UTILS_UTF8_H_
|
|
|
|
|
|
|
|
#include <string>
|
|
#if defined(_MSC_VER)
|
|
#include <malloc.h>
|
|
#include <windows.h>
|
|
#elif defined(__FreeBSD__)
|
|
//#include <cstdlib>
|
|
#else
|
|
#include <alloca.h>
|
|
#endif
|
|
#include <cstdlib>
|
|
|
|
#include <clocale>
|
|
#include "liboamcpp.h"
|
|
|
|
/** @file */
|
|
|
|
namespace funcexp
|
|
{
|
|
namespace utf8
|
|
{
|
|
extern bool JPcodePoint; // code point ordering (Japanese UTF) flag, used in idb_strcoll
|
|
|
|
const int MAX_UTF8_BYTES_PER_CHAR = 4;
|
|
|
|
// A global loc object so we don't construct one at every compare
|
|
extern std::locale loc;
|
|
// Is there a way to construct a global reference to a facet?
|
|
// const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
|
|
|
|
//Infinidb version of strlocale BUG 5362
|
|
//set System Locale "C" by default
|
|
//return the system Locale currently set in from Columnstore.xml
|
|
inline
|
|
std::string idb_setlocale()
|
|
{
|
|
// get and set locale language
|
|
std::string systemLang("C");
|
|
oam::Oam oam;
|
|
static bool loggedMsg = false;
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig("SystemLang", systemLang);
|
|
}
|
|
catch (...)
|
|
{
|
|
systemLang = "C";
|
|
}
|
|
|
|
char* pLoc = setlocale(LC_ALL, systemLang.c_str());
|
|
|
|
if (pLoc == NULL)
|
|
{
|
|
try
|
|
{
|
|
if (!loggedMsg)
|
|
{
|
|
//send alarm
|
|
alarmmanager::ALARMManager alarmMgr;
|
|
std::string alarmItem = "system";
|
|
alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::SET);
|
|
|
|
// Log one line
|
|
logging::LoggingID lid(17); // ProcessManager -- probably the only one to find this for now
|
|
logging::MessageLog ml(lid);
|
|
logging::Message msg(1);
|
|
logging::Message::Args args;
|
|
args.add("Failed to set locale ");
|
|
args.add(systemLang.c_str());
|
|
args.add(": Setting to 'C'. Critical alarm generated");
|
|
msg.format( args );
|
|
ml.logErrorMessage(msg);
|
|
|
|
loggedMsg = true;
|
|
}
|
|
systemLang = "C";
|
|
}
|
|
catch (...)
|
|
{
|
|
// Ignoring for time being.
|
|
}
|
|
}
|
|
else
|
|
{
|
|
try
|
|
{
|
|
//send alarm
|
|
alarmmanager::ALARMManager alarmMgr;
|
|
std::string alarmItem = "system";
|
|
alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::CLEAR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// Ignoring for time being.
|
|
}
|
|
|
|
}
|
|
|
|
printf ("Locale is : %s\n", systemLang.c_str() );
|
|
|
|
//BUG 2991
|
|
setlocale(LC_NUMERIC, "C");
|
|
|
|
if (systemLang.find("ja_JP") != std::string::npos)
|
|
JPcodePoint = true;
|
|
|
|
// MCOL-1559 Save off the locale to save runtime cpus
|
|
std::locale localloc(systemLang.c_str());
|
|
loc = localloc;
|
|
|
|
return systemLang;
|
|
}
|
|
|
|
// Infinidb version of strcoll. BUG 5362
|
|
// strcoll() comparison while ja_JP.utf8 does not give correct results.
|
|
// For correct results strcmp() can be used.
|
|
inline
|
|
int idb_strcoll(const char* str1, const char* str2)
|
|
{
|
|
if (JPcodePoint)
|
|
return strcmp(str1, str2);
|
|
else
|
|
return strcoll(str1, str2);
|
|
}
|
|
|
|
// MCOL-1559 Add a trimmed version of strcoll
|
|
// The intent here is to make no copy of the original strings and
|
|
// not modify them, so we can't use trim to deal with the spaces.
|
|
inline
|
|
int idb_strtrimcoll(const std::string& str1, const std::string& str2)
|
|
{
|
|
static const std::string whitespaces (" ");
|
|
const char* s1 = str1.c_str();
|
|
const char* s2 = str2.c_str();
|
|
|
|
// Set found1 to the last non-whitespace char in str1
|
|
std::size_t found1 = str1.find_last_not_of(whitespaces);
|
|
// Set found2 to the first whitespace char in str2
|
|
std::size_t found2 = str2.find_last_not_of(whitespaces);
|
|
|
|
// Are both strings empty or all whitespace?
|
|
if (found1 == std::string::npos && found2 == std::string::npos)
|
|
{
|
|
return 0; // they match
|
|
}
|
|
// If str1 is empty or all spaces
|
|
if (found1 == std::string::npos)
|
|
{
|
|
return -1;
|
|
}
|
|
// If str2 is empty or all spaces
|
|
if (found2 == std::string::npos)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
// found1 and found2 point to the character that is not a space.
|
|
// compare wants it to point to one past.
|
|
found1 += 1;
|
|
found2 += 1;
|
|
// If no trimming needs doing, then strcoll is faster
|
|
if (found1 == str1.size() && found2 == str2.size())
|
|
{
|
|
return idb_strcoll(s1, s2);
|
|
}
|
|
// Compare the (trimmed) strings
|
|
const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
|
|
int rtn = coll.compare(s1, s1+found1, s2, s2+found2);
|
|
return rtn;
|
|
}
|
|
|
|
// BUG 5241
|
|
// Infinidb specific mbstowcs(). This will handle both windows and unix platforms
|
|
// Params dest and max should have enough length to accomodate NULL
|
|
inline
|
|
size_t idb_mbstowcs(wchar_t* dest, const char* src, size_t max)
|
|
{
|
|
#ifdef _MSC_VER
|
|
// 4th param (-1) denotes to convert till hit NULL char
|
|
// if 6th param max = 0, will return the required buffer size
|
|
size_t strwclen = MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, (int)max);
|
|
// decrement the count of NULL; will become -1 on failure
|
|
return --strwclen;
|
|
|
|
#else
|
|
return mbstowcs(dest, src, max);
|
|
#endif
|
|
}
|
|
|
|
// BUG 5241
|
|
// Infinidb specific wcstombs(). This will handle both windows and unix platforms
|
|
// Params dest and max should have enough length to accomodate NULL
|
|
inline
|
|
size_t idb_wcstombs(char* dest, const wchar_t* src, size_t max)
|
|
{
|
|
#ifdef _MSC_VER
|
|
// 4th param (-1) denotes to convert till hit NULL char
|
|
//if 6th param max = 0, will return the required buffer size
|
|
size_t strmblen = WideCharToMultiByte( CP_UTF8, 0, src, -1, dest, (int)max, NULL, NULL);
|
|
// decrement the count of NULL; will become -1 on failure
|
|
return --strmblen;
|
|
#else
|
|
return wcstombs(dest, src, max);
|
|
#endif
|
|
}
|
|
|
|
// convert UTF-8 string to wstring
|
|
inline
|
|
std::wstring utf8_to_wstring (const std::string& str)
|
|
{
|
|
size_t bufsize = str.length() + 1;
|
|
|
|
// Convert to wide characters. Do all further work in wide characters
|
|
wchar_t* wcbuf = new wchar_t[bufsize];
|
|
// Passing +1 so that windows is happy to see extra position to place NULL
|
|
size_t strwclen = idb_mbstowcs(wcbuf, str.c_str(), str.length() + 1);
|
|
|
|
// if result is -1 it means bad characters which may happen if locale is wrong.
|
|
// return an empty string
|
|
if ( strwclen == static_cast<size_t>(-1) )
|
|
strwclen = 0;
|
|
|
|
std::wstring ret(wcbuf, strwclen);
|
|
|
|
delete [] wcbuf;
|
|
return ret;
|
|
}
|
|
|
|
|
|
// convert wstring to UTF-8 string
|
|
inline
|
|
std::string wstring_to_utf8 (const std::wstring& str)
|
|
{
|
|
char* outbuf = new char[(str.length() * MAX_UTF8_BYTES_PER_CHAR) + 1];
|
|
// Passing +1 so that windows is happy to see extra position to place NULL
|
|
size_t strmblen = idb_wcstombs(outbuf, str.c_str(), str.length() * MAX_UTF8_BYTES_PER_CHAR + 1);
|
|
|
|
// if result is -1 it means bad characters which may happen if locale is wrong.
|
|
// return an empty string
|
|
if ( strmblen == static_cast<size_t>(-1) )
|
|
strmblen = 0;
|
|
|
|
std::string ret(outbuf, strmblen);
|
|
|
|
delete [] outbuf;
|
|
return ret;
|
|
}
|
|
|
|
inline
|
|
uint8_t utf8_truncate_point(const char* input, size_t length)
|
|
{
|
|
// Find the beginning of a multibyte char to truncate at and return the
|
|
// number of bytes to truncate
|
|
if (length < 3)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
const unsigned char* b = (const unsigned char*)(input) + length - 3;
|
|
|
|
if (b[2] & 0x80)
|
|
{
|
|
// First byte in a new multi-byte sequence
|
|
if (b[2] & 0x40) return 1;
|
|
// 3 byte sequence
|
|
else if ((b[1] & 0xe0) == 0xe0) return 2;
|
|
// 4 byte sequence
|
|
else if ((b[0] & 0xf0) == 0xf0) return 3;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
} //namespace utf8
|
|
} //namespace funcexp
|
|
|
|
#endif
|