You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-30 07:25:34 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			133 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			133 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (C) 2014 InfiniDB, Inc.
 | |
|  * Copyright (C) 2016 MariaDB Corporation.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA. */
 | |
| 
 | |
| //  $Id$
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include <string>
 | |
| #if   defined(__FreeBSD__)
 | |
| //#include <cstdlib>
 | |
| #else
 | |
| #include <alloca.h>
 | |
| #endif
 | |
| #include <cstdlib>
 | |
| 
 | |
| #include <clocale>
 | |
| #include "liboamcpp.h"
 | |
| 
 | |
| // Change the name from utf8. Even change the file name to something resembling char helper
 | |
| namespace utf8
 | |
| {
 | |
| const int MAX_UTF8_BYTES_PER_CHAR = 4;
 | |
| 
 | |
| // BUG 5241
 | |
| // Infinidb specific mbstowcs(). This will handle both windows and unix platforms
 | |
| // Params dest and max should have enough length to accomodate NULL
 | |
| inline size_t idb_mbstowcs(wchar_t* dest, const char* src, size_t max)
 | |
| {
 | |
|   return mbstowcs(dest, src, max);
 | |
| }
 | |
| 
 | |
| // BUG 5241
 | |
| // Infinidb specific wcstombs(). This will handle both windows and unix platforms
 | |
| // Params dest and max should have enough length to accomodate NULL
 | |
| inline size_t idb_wcstombs(char* dest, const wchar_t* src, size_t max)
 | |
| {
 | |
|   return wcstombs(dest, src, max);
 | |
| }
 | |
| 
 | |
| // convert UTF-8 string to wstring
 | |
| inline std::wstring utf8_to_wstring(const std::string& str)
 | |
| {
 | |
|   size_t bufsize = str.length() + 1;
 | |
| 
 | |
|   // Convert to wide characters. Do all further work in wide characters
 | |
|   wchar_t* wcbuf = new wchar_t[bufsize];
 | |
|   // Passing +1 so that windows is happy to see extra position to place NULL
 | |
|   size_t strwclen = idb_mbstowcs(wcbuf, str.c_str(), str.length() + 1);
 | |
| 
 | |
|   // if result is -1 it means bad characters which may happen if locale is wrong.
 | |
|   // return an empty string
 | |
|   if (strwclen == static_cast<size_t>(-1))
 | |
|     strwclen = 0;
 | |
| 
 | |
|   std::wstring ret(wcbuf, strwclen);
 | |
| 
 | |
|   delete[] wcbuf;
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| // convert wstring to UTF-8 string
 | |
| inline std::string wstring_to_utf8(const std::wstring& str)
 | |
| {
 | |
|   char* outbuf = new char[(str.length() * MAX_UTF8_BYTES_PER_CHAR) + 1];
 | |
|   // Passing +1 so that windows is happy to see extra position to place NULL
 | |
|   size_t strmblen = idb_wcstombs(outbuf, str.c_str(), str.length() * MAX_UTF8_BYTES_PER_CHAR + 1);
 | |
| 
 | |
|   // if result is -1 it means bad characters which may happen if locale is wrong.
 | |
|   // return an empty string
 | |
|   if (strmblen == static_cast<size_t>(-1))
 | |
|     strmblen = 0;
 | |
| 
 | |
|   std::string ret(outbuf, strmblen);
 | |
| 
 | |
|   delete[] outbuf;
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| inline uint8_t utf8_truncate_point(const char* input, size_t length)
 | |
| {
 | |
|   // Find the beginning of a multibyte char to truncate at and return the
 | |
|   // number of bytes to truncate1`
 | |
|   if (length < 3)
 | |
|   {
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   const unsigned char* b = (const unsigned char*)(input) + length - 3;
 | |
| 
 | |
|   if (b[2] & 0x80)
 | |
|   {
 | |
|     // First byte in a new multi-byte sequence
 | |
|     if (b[2] & 0x40)
 | |
|       return 1;
 | |
|     // 3 byte sequence
 | |
|     else if ((b[1] & 0xe0) == 0xe0)
 | |
|       return 2;
 | |
|     // 4 byte sequence
 | |
|     else if ((b[0] & 0xf0) == 0xf0)
 | |
|       return 3;
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| int mcs_strcoll(const char* str1, const char* str2, const uint32_t charsetNumber);
 | |
| int mcs_strcoll(const char* str1, const uint32_t l1, const char* str2, const uint32_t l2,
 | |
|                 const uint32_t charsetNumber);
 | |
| int mcs_strcoll(const std::string* str1, const std::string* str2, const uint32_t charsetNumber);
 | |
| int mcs_strcoll(const std::string& str1, const std::string& str2, const uint32_t charsetNumber);
 | |
| 
 | |
| int mcs_strcollsp(const char* str1, const char* str2, const uint32_t charsetNumber);
 | |
| int mcs_strcollsp(const char* str1, uint32_t l1, const char* str2, const uint32_t l2,
 | |
|                   const uint32_t charsetNumber);
 | |
| int mcs_strcollsp(const std::string* str1, const std::string* str2, const uint32_t charsetNumber);
 | |
| int mcs_strcollsp(const std::string& str1, const std::string& str2, const uint32_t charsetNumber);
 | |
| }  // namespace utf8
 |