mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-26 11:48:52 +03:00
133 lines
4.2 KiB
C++
133 lines
4.2 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
* Copyright (C) 2016 MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
// $Id$
|
|
|
|
#pragma once
|
|
|
|
#include <string>
|
|
#if defined(__FreeBSD__)
|
|
//#include <cstdlib>
|
|
#else
|
|
#include <alloca.h>
|
|
#endif
|
|
#include <cstdlib>
|
|
|
|
#include <clocale>
|
|
#include "liboamcpp.h"
|
|
|
|
// Change the name from utf8. Even change the file name to something resembling char helper
|
|
namespace utf8
|
|
{
|
|
const int MAX_UTF8_BYTES_PER_CHAR = 4;
|
|
|
|
// BUG 5241
|
|
// Infinidb specific mbstowcs(). This will handle both windows and unix platforms
|
|
// Params dest and max should have enough length to accomodate NULL
|
|
inline size_t idb_mbstowcs(wchar_t* dest, const char* src, size_t max)
|
|
{
|
|
return mbstowcs(dest, src, max);
|
|
}
|
|
|
|
// BUG 5241
|
|
// Infinidb specific wcstombs(). This will handle both windows and unix platforms
|
|
// Params dest and max should have enough length to accomodate NULL
|
|
inline size_t idb_wcstombs(char* dest, const wchar_t* src, size_t max)
|
|
{
|
|
return wcstombs(dest, src, max);
|
|
}
|
|
|
|
// convert UTF-8 string to wstring
|
|
inline std::wstring utf8_to_wstring(const std::string& str)
|
|
{
|
|
size_t bufsize = str.length() + 1;
|
|
|
|
// Convert to wide characters. Do all further work in wide characters
|
|
wchar_t* wcbuf = new wchar_t[bufsize];
|
|
// Passing +1 so that windows is happy to see extra position to place NULL
|
|
size_t strwclen = idb_mbstowcs(wcbuf, str.c_str(), str.length() + 1);
|
|
|
|
// if result is -1 it means bad characters which may happen if locale is wrong.
|
|
// return an empty string
|
|
if (strwclen == static_cast<size_t>(-1))
|
|
strwclen = 0;
|
|
|
|
std::wstring ret(wcbuf, strwclen);
|
|
|
|
delete[] wcbuf;
|
|
return ret;
|
|
}
|
|
|
|
// convert wstring to UTF-8 string
|
|
inline std::string wstring_to_utf8(const std::wstring& str)
|
|
{
|
|
char* outbuf = new char[(str.length() * MAX_UTF8_BYTES_PER_CHAR) + 1];
|
|
// Passing +1 so that windows is happy to see extra position to place NULL
|
|
size_t strmblen = idb_wcstombs(outbuf, str.c_str(), str.length() * MAX_UTF8_BYTES_PER_CHAR + 1);
|
|
|
|
// if result is -1 it means bad characters which may happen if locale is wrong.
|
|
// return an empty string
|
|
if (strmblen == static_cast<size_t>(-1))
|
|
strmblen = 0;
|
|
|
|
std::string ret(outbuf, strmblen);
|
|
|
|
delete[] outbuf;
|
|
return ret;
|
|
}
|
|
|
|
inline uint8_t utf8_truncate_point(const char* input, size_t length)
|
|
{
|
|
// Find the beginning of a multibyte char to truncate at and return the
|
|
// number of bytes to truncate1`
|
|
if (length < 3)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
const unsigned char* b = (const unsigned char*)(input) + length - 3;
|
|
|
|
if (b[2] & 0x80)
|
|
{
|
|
// First byte in a new multi-byte sequence
|
|
if (b[2] & 0x40)
|
|
return 1;
|
|
// 3 byte sequence
|
|
else if ((b[1] & 0xe0) == 0xe0)
|
|
return 2;
|
|
// 4 byte sequence
|
|
else if ((b[0] & 0xf0) == 0xf0)
|
|
return 3;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int mcs_strcoll(const char* str1, const char* str2, const uint32_t charsetNumber);
|
|
int mcs_strcoll(const char* str1, const uint32_t l1, const char* str2, const uint32_t l2,
|
|
const uint32_t charsetNumber);
|
|
int mcs_strcoll(const std::string* str1, const std::string* str2, const uint32_t charsetNumber);
|
|
int mcs_strcoll(const std::string& str1, const std::string& str2, const uint32_t charsetNumber);
|
|
|
|
int mcs_strcollsp(const char* str1, const char* str2, const uint32_t charsetNumber);
|
|
int mcs_strcollsp(const char* str1, uint32_t l1, const char* str2, const uint32_t l2,
|
|
const uint32_t charsetNumber);
|
|
int mcs_strcollsp(const std::string* str1, const std::string* str2, const uint32_t charsetNumber);
|
|
int mcs_strcollsp(const std::string& str1, const std::string& str2, const uint32_t charsetNumber);
|
|
} // namespace utf8
|