MCOL-3536 collation

2025-07-30 19:23:07 +03:00 · 2020-05-26 12:42:11 -05:00
parent 11ba12f6ea
commit 06e50e0926
47 changed files with 516 additions and 535 deletions
--- a/utils/funcexp/func_case.cpp
+++ b/utils/funcexp/func_case.cpp
@ -22,6 +22,10 @@
 *
 ****************************************************************************/

+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>
+
 #include <string>
 //#define NDEBUG
 #include <cassert>
@ -180,20 +184,21 @@ inline uint64_t simple_case_cmp(Row& row,
        case execplan::CalpontSystemCatalog::VARCHAR:
        {
            const string& ev = parm[n]->data()->getStrVal(row, isNull);
-
            if (isNull)
                break;
+            CHARSET_INFO* cs = parm[n]->data()->resultType().getCharset();

            for (i = 1; i <= whereCount; i++)
            {
                //BUG 5362
-                if (utf8::idb_strcoll(ev.c_str(), parm[i]->data()->getStrVal(row, isNull).c_str()) == 0 && !isNull)
+                const string& p1 = parm[i]->data()->getStrVal(row, isNull);
+                if (isNull)
+                    break;
+                if (cs->strnncoll(ev.c_str(), ev.length(), p1.c_str(), p1.length()) == 0)
                {
                    foundIt = true;
                    break;
                }
-                else
-                    isNull = false;
            }

            break;
--- a/utils/funcexp/func_char_length.cpp
+++ b/utils/funcexp/func_char_length.cpp
@ -49,7 +49,7 @@ namespace funcexp

 CalpontSystemCatalog::ColType Func_char_length::operationType( FunctionParm& fp, CalpontSystemCatalog::ColType& resultType )
 {
-    return resultType;
+    return fp[0]->data()->resultType();
 }

 int64_t Func_char_length::getIntVal(rowgroup::Row& row,
@ -86,8 +86,7 @@ int64_t Func_char_length::getIntVal(rowgroup::Row& row,
                return 0;
            const char* b = tstr.c_str();
            const char* e = tstr.c_str() + tstr.length();
-            const CHARSET_INFO* cs = get_charset(parm[0]->data()->resultType().charsetNumber, MYF(MY_WME));
-            return (int64_t)cs->numchars(b, e);
+            return (int64_t)parm[0]->data()->resultType().getCharset()->numchars(b, e);
        }

        case execplan::CalpontSystemCatalog::DATE:
--- a/utils/funcexp/func_greatest.cpp
+++ b/utils/funcexp/func_greatest.cpp
@ -22,6 +22,10 @@
 *
 ****************************************************************************/

+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>
+
 #include <cstdlib>
 #include <string>
 #include <sstream>
@ -148,6 +152,7 @@ std::string Func_greatest::getStrVal(rowgroup::Row& row,
                                     execplan::CalpontSystemCatalog::ColType& op_ct)
 {
    const string& str = fp[0]->data()->getStrVal(row, isNull);
+    CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();

    string greatestStr = str;

@ -155,12 +160,10 @@ std::string Func_greatest::getStrVal(rowgroup::Row& row,
    {
        const string& str1 = fp[i]->data()->getStrVal(row, isNull);

-        int tmp = utf8::idb_strcoll(greatestStr.c_str(), str1.c_str());
-
-        if ( tmp < 0 )
-
-//		if ( greatestStr < str1 )
+        if (cs->strnncoll(greatestStr.c_str(), greatestStr.length(), str1.c_str(), str1.length()) < 0)
+        {
            greatestStr = str1;
+        }
    }

    return greatestStr;
--- a/utils/funcexp/func_in.cpp
+++ b/utils/funcexp/func_in.cpp
@ -22,6 +22,10 @@
 *
 ****************************************************************************/

+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>
+
 #include <cstdlib>
 #include <string>
 using namespace std;
@ -52,11 +56,6 @@ inline bool numericEQ(result_t op1, result_t op2)
    return op1 == op2;
 }

-inline bool strEQ(string op1, string op2)
-{
-    return utf8::idb_strcoll(op1.c_str(), op2.c_str()) == 0;
-}
-
 inline bool getBoolForIn(rowgroup::Row& row,
                         funcexp::FunctionParm& pm,
                         bool& isNull,
@ -273,15 +272,16 @@ inline bool getBoolForIn(rowgroup::Row& row,
        case execplan::CalpontSystemCatalog::TEXT:
        {
            const string& val = pm[0]->data()->getStrVal(row, isNull);
-
            if (isNull)
                return false;

+            CHARSET_INFO* cs = pm[0]->data()->resultType().getCharset();
+
            for (uint32_t i = 1; i < pm.size(); i++)
            {
                isNull = false;
-
-                if ( utf8::idb_strcoll(val.c_str(), pm[i]->data()->getStrVal(row, isNull).c_str()) == 0 && !isNull)
+                const string& str1 = pm[i]->data()->getStrVal(row, isNull);
+                if (cs->strnncoll(val.c_str(), val.length(), str1.c_str(), str1.length()) == 0 && !isNull)
                    return true;

                if (isNull && isNotIn)
--- a/utils/funcexp/func_instr.cpp
+++ b/utils/funcexp/func_instr.cpp
@ -20,6 +20,10 @@
 *
 *
 ****************************************************************************/
+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>
+#include <m_ctype.h>

 #include <cstdlib>
 #include <string>
@ -42,37 +46,50 @@ CalpontSystemCatalog::ColType Func_instr::operationType( FunctionParm& fp, Calpo
    return ct;
 }

-size_t Func_instr::in_str(const string& str, const string& substr, size_t start)
-{
-    // convert both inputs to wide character strings
-    std::wstring wcstr = utf8::utf8_to_wstring(str);
-    std::wstring wcsubstr = utf8::utf8_to_wstring(substr);
-
-    if ((str.length() && !wcstr.length()) ||
-            (substr.length() && !wcsubstr.length()))
-        // this means one or both of the strings had conversion errors to wide character
-        return 0;
-
-    size_t pos = wcstr.find(wcsubstr, start - 1);
-    return (pos != string::npos ? pos + 1 : 0);
-}
-
 int64_t Func_instr::getIntVal(rowgroup::Row& row,
                              FunctionParm& parm,
                              bool& isNull,
-                              CalpontSystemCatalog::ColType&)
+                              CalpontSystemCatalog::ColType& colType)
 {
-    uint64_t start = 1;
-
-    if (parm.size() == 3)
-        start = parm[2]->data()->getIntVal(row, isNull);
-
-    if (isNull || start == 0)
+    int64_t start = 0;
+    int64_t start0= 0;
+    my_match_t match;
+    
+    const std::string& str = parm[0]->data()->getStrVal(row, isNull);
+    if (isNull)
+        return 0;
+    const char* s1 = str.c_str();
+    uint32_t l1 = (uint32_t)str.length();
+    
+    const std::string& substr =parm[1]->data()->getStrVal(row, isNull);
+    if (isNull)
        return 0;

-    //Bug 5110 : to support utf8 char type, we have to convert and search
-    return in_str(parm[0]->data()->getStrVal(row, isNull), parm[1]->data()->getStrVal(row, isNull), start);
+    const char* s2 = substr.c_str();
+    uint32_t l2 = (uint32_t)substr.length();
+    if (l2 < 1)
+        return start + 1;

+    CHARSET_INFO* cs = colType.getCharset();
+    
+    if (parm.size() == 3)
+    {
+        start0 = start = parm[2]->data()->getIntVal(row, isNull) - 1;
+        
+        if ((start < 0) || (start > l1))
+          return 0;
+        
+        start = (int64_t)cs->charpos(s1, s1+l1, start); // adjust start for multi-byte
+
+        if (start + l2 > l1) // Substring is longer than str at pos.
+            return 0;
+    }
+    
+    if (!cs->instr(s1+start, l1-start,
+                   s2, l2,
+                   &match, 1))
+        return 0;
+    return (int64_t)match.mb_len + start0 + 1;
 }


--- a/utils/funcexp/func_lcase.cpp
+++ b/utils/funcexp/func_lcase.cpp
@ -20,6 +20,10 @@
 *
 *
 ****************************************************************************/
+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>
+#include <m_ctype.h>

 #include <string>
 using namespace std;
@ -56,31 +60,22 @@ CalpontSystemCatalog::ColType Func_lcase::operationType(FunctionParm& fp, Calpon
 std::string Func_lcase::getStrVal(rowgroup::Row& row,
                                  FunctionParm& fp,
                                  bool& isNull,
-                                  execplan::CalpontSystemCatalog::ColType&)
+                                  execplan::CalpontSystemCatalog::ColType& colType)
 {
-//	string str = fp[0]->data()->getStrVal(row, isNull);
-
-//	transform (str.begin(), str.end(), str.begin(), to_lower());
-
    const string& tstr = fp[0]->data()->getStrVal(row, isNull);

    if (isNull)
        return "";

-    size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
-    wchar_t* wcbuf = new wchar_t[strwclen];
-    strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
-    wstring wstr(wcbuf, strwclen);
+    CHARSET_INFO* cs = colType.getCharset();
+    uint64_t inLen = tstr.length();
+    uint64_t bufLen= inLen * cs->casedn_multiply;
+    char* outBuf = new char[bufLen];
+    
+    uint64_t outLen = cs->casedn(tstr.c_str(), inLen, outBuf, bufLen);

-    for (uint32_t i = 0; i < strwclen; i++)
-        wstr[i] = std::towlower(wstr[i]);
-
-    size_t strmblen = utf8::idb_wcstombs(0, wstr.c_str(), 0) + 1;
-    char* outbuf = new char[strmblen];
-    strmblen = utf8::idb_wcstombs(outbuf, wstr.c_str(), strmblen);
-    std::string ret(outbuf, strmblen);
-    delete [] outbuf;
-    delete [] wcbuf;
+    string ret = string(outBuf, outLen);
+    delete [] outBuf;
    return ret;
 }

--- a/utils/funcexp/func_least.cpp
+++ b/utils/funcexp/func_least.cpp
@ -22,6 +22,10 @@
 *
 ****************************************************************************/

+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>
+
 #include <cstdlib>
 #include <string>
 #include <sstream>
@ -127,17 +131,16 @@ std::string Func_least::getStrVal(rowgroup::Row& row,
                                  execplan::CalpontSystemCatalog::ColType& op_ct)
 {
    string leastStr = fp[0]->data()->getStrVal(row, isNull);
+    CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();

    for (uint32_t i = 1; i < fp.size(); i++)
    {
        const string& str1 = fp[i]->data()->getStrVal(row, isNull);

-        int tmp = utf8::idb_strcoll(leastStr.c_str(), str1.c_str());
-
-        if ( tmp > 0 )
-
-//		if ( leastStr > str1 )
+        if (cs->strnncoll(leastStr.c_str(), leastStr.length(), str1.c_str(), str1.length()) > 0)
+        {
            leastStr = str1;
+        }
    }

    return leastStr;
--- a/utils/funcexp/func_nullif.cpp
+++ b/utils/funcexp/func_nullif.cpp
@ -22,6 +22,11 @@
 *
 ****************************************************************************/

+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#undef LONGLONG_MIN
+#include <my_sys.h>
+
 #include <cstdlib>
 #include <string>
 #include <sstream>
@ -363,6 +368,7 @@ string Func_nullif::getStrVal(rowgroup::Row& row,
                              CalpontSystemCatalog::ColType& op_ct)
 {
    string exp1 = parm[0]->data()->getStrVal(row, isNull);
+    CHARSET_INFO* cs = parm[0]->data()->resultType().getCharset();

    if (isNull)
    {
@ -395,7 +401,7 @@ string Func_nullif::getStrVal(rowgroup::Row& row,
        exp2 = exp2 + " 00:00:00";
    }

-    if ( utf8::idb_strcoll(exp1.c_str(), exp2.c_str()) == 0 )
+    if (cs->strnncoll(exp1.c_str(), exp1.length(), exp2.c_str(), exp2.length()) == 0)
    {
        isNull = true;
        return "";
--- a/utils/funcexp/func_strcmp.cpp
+++ b/utils/funcexp/func_strcmp.cpp
@ -21,6 +21,10 @@
 *
 ****************************************************************************/

+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>
+
 #include <cstdlib>
 #include <string>
 #include <sstream>
@ -39,6 +43,10 @@ using namespace joblist;
 #include "utils_utf8.h"
 using namespace funcexp;

+// Because including my_sys.h in a Columnstore header causes too many conflicts
+struct charset_info_st;
+typedef const struct charset_info_st CHARSET_INFO;
+
 class to_lower
 {
 public:
@ -64,10 +72,11 @@ int64_t Func_strcmp::getIntVal(rowgroup::Row& row,
                               bool& isNull,
                               execplan::CalpontSystemCatalog::ColType& op_ct)
 {
+    CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
    const string& str = fp[0]->data()->getStrVal(row, isNull);
-
    const string& str1 = fp[1]->data()->getStrVal(row, isNull);
-    int ret = utf8::idb_strcoll(str.c_str(), str1.c_str());
+
+    int ret = cs->strnncoll(str.c_str(), str.length(), str1.c_str(), str1.length());
    // mysql's strcmp returns only -1, 0, and 1
    return (ret < 0 ? -1 : (ret > 0 ? 1 : 0));
 }
--- a/utils/funcexp/func_ucase.cpp
+++ b/utils/funcexp/func_ucase.cpp
@ -20,6 +20,10 @@
 *
 *
 ****************************************************************************/
+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>
+#include <m_ctype.h>

 #include <string>
 using namespace std;
@ -55,31 +59,22 @@ CalpontSystemCatalog::ColType Func_ucase::operationType(FunctionParm& fp, Calpon
 std::string Func_ucase::getStrVal(rowgroup::Row& row,
                                  FunctionParm& fp,
                                  bool& isNull,
-                                  execplan::CalpontSystemCatalog::ColType&)
+                                  execplan::CalpontSystemCatalog::ColType& colType)
 {
-//	string str = fp[0]->data()->getStrVal(row, isNull);
-
-//	transform (str.begin(), str.end(), str.begin(), to_lower());
-
    const string& tstr = fp[0]->data()->getStrVal(row, isNull);

    if (isNull)
        return "";

-    size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
-    wchar_t* wcbuf = new wchar_t[strwclen];
-    strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
-    wstring wstr(wcbuf, strwclen);
+    CHARSET_INFO* cs = colType.getCharset();
+    uint64_t inLen = tstr.length();
+    uint64_t bufLen= inLen * cs->caseup_multiply;
+    char* outBuf = new char[bufLen];
+    
+    uint64_t outLen = cs->caseup(tstr.c_str(), inLen, outBuf, bufLen);

-    for (uint32_t i = 0; i < strwclen; i++)
-        wstr[i] = std::towupper(wstr[i]);
-
-    size_t strmblen = utf8::idb_wcstombs(0, wstr.c_str(), 0) + 1;
-    char* outbuf = new char[strmblen];
-    strmblen = utf8::idb_wcstombs(outbuf, wstr.c_str(), strmblen);
-    std::string ret(outbuf, strmblen);
-    delete [] outbuf;
-    delete [] wcbuf;
+    string ret = string(outBuf, outLen);
+    delete [] outBuf;
    return ret;
 }

--- a/utils/funcexp/functor_int.h
+++ b/utils/funcexp/functor_int.h
@ -84,8 +84,6 @@ public:

    execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType);

-    size_t in_str(const std::string& str, const std::string& substr, size_t start);
-
    int64_t getIntVal(rowgroup::Row& row,
                      FunctionParm& fp,
                      bool& isNull,
--- a/utils/funcexp/utils_utf8.h
+++ b/utils/funcexp/utils_utf8.h
@ -1,303 +0,0 @@
-/* Copyright (C) 2014 InfiniDB, Inc.
- * Copyright (C) 2016 MariaDB Corporation.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; version 2 of
-   the License.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-   MA 02110-1301, USA. */
-
-//  $Id$
-
-
-#ifndef _UTILS_UTF8_H_
-#define _UTILS_UTF8_H_
-
-
-
-#include <string>
-#if defined(_MSC_VER)
-#include <malloc.h>
-#include <windows.h>
-#elif defined(__FreeBSD__)
-//#include <cstdlib>
-#else
-#include <alloca.h>
-#endif
-#include <cstdlib>
-
-#include <clocale>
-#include "liboamcpp.h"
-
-/** @file */
-
-namespace funcexp
-{
-namespace utf8
-{
-extern bool JPcodePoint;		// code point ordering (Japanese UTF) flag, used in idb_strcoll
-
-const int MAX_UTF8_BYTES_PER_CHAR = 4;
-
-// A global loc object so we don't construct one at every compare
-extern std::locale loc;
-// Is there a way to construct a global reference to a facet?
-// const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
-
-//Infinidb version of strlocale  BUG 5362
-//set System Locale "C" by default
-//return the system Locale currently set in from Columnstore.xml
-inline
-std::string idb_setlocale()
-{
-    // get and set locale language
-    std::string systemLang("C");
-    oam::Oam oam;
-    static bool loggedMsg = false;
-
-    try
-    {
-        oam.getSystemConfig("SystemLang", systemLang);
-    }
-    catch (...)
-    {
-        systemLang = "C";
-    }
-
-    char* pLoc = setlocale(LC_ALL, systemLang.c_str());
-
-    if (pLoc == NULL)
-    {
-        try
-        {
-            if (!loggedMsg)
-            {
-                //send alarm
-                alarmmanager::ALARMManager alarmMgr;
-                std::string alarmItem = "system";
-                alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::SET);
-                
-                // Log one line
-                logging::LoggingID lid(17);  // ProcessManager -- probably the only one to find this for now
-                logging::MessageLog ml(lid);
-                logging::Message msg(1);
-                logging::Message::Args args;
-                args.add("Failed to set locale ");
-                args.add(systemLang.c_str());
-                args.add(": Setting to 'C'. Critical alarm generated");
-                msg.format( args );
-                ml.logErrorMessage(msg);
-                
-                loggedMsg = true;
-            }
-            systemLang = "C";
-        }
-        catch (...)
-        {
-            // Ignoring for time being.
-        }
-    }
-    else
-    {
-        try
-        {
-            //send alarm
-            alarmmanager::ALARMManager alarmMgr;
-            std::string alarmItem = "system";
-            alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::CLEAR);
-        }
-        catch (...)
-        {
-            // Ignoring for time being.
-        }
-
-    }
-
-    printf ("Locale is : %s\n", systemLang.c_str() );
-
-    //BUG 2991
-    setlocale(LC_NUMERIC, "C");
-
-    if (systemLang.find("ja_JP") != std::string::npos)
-        JPcodePoint = true;
-
-    // MCOL-1559 Save off the locale to save runtime cpus
-    std::locale localloc(systemLang.c_str());
-    loc = localloc;
-
-    return systemLang;
-}
-
-// Infinidb version of strcoll.  BUG 5362
-// strcoll() comparison while ja_JP.utf8 does not give correct results.
-// For correct results strcmp() can be used.
-inline
-int idb_strcoll(const char* str1, const char* str2)
-{
-    if (JPcodePoint)
-        return strcmp(str1, str2);
-    else
-        return strcoll(str1, str2);
-}
-
-// MCOL-1559 Add a trimmed version of strcoll
-// The intent here is to make no copy of the original strings and
-// not modify them, so we can't use trim to deal with the spaces.
-inline
-int idb_strtrimcoll(const std::string& str1, const std::string& str2)
-{
-    static const std::string whitespaces (" ");
-    const char* s1 = str1.c_str();
-    const char* s2 = str2.c_str();
-
-    // Set found1 to the last non-whitespace char in str1
-    std::size_t found1 = str1.find_last_not_of(whitespaces);
-    // Set found2 to the first whitespace char in str2
-    std::size_t found2 = str2.find_last_not_of(whitespaces);
-
-     // Are both strings empty or all whitespace?
-    if (found1 == std::string::npos && found2 == std::string::npos)
-    {
-        return 0; // they match
-    }
-    // If str1 is empty or all spaces
-    if (found1 == std::string::npos)
-    {
-        return -1;
-    }
-    // If str2 is empty or all spaces
-    if (found2 == std::string::npos)
-    {
-        return 1;
-    }
-
-    // found1 and found2 point to the character that is not a space. 
-    // compare wants it to point to one past.
-    found1 += 1;
-    found2 += 1;
-    // If no trimming needs doing, then strcoll is faster
-    if (found1 == str1.size() && found2 == str2.size())
-    {
-        return idb_strcoll(s1, s2);
-    }
-    // Compare the (trimmed) strings
-    const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
-    int rtn = coll.compare(s1, s1+found1, s2, s2+found2);
-    return rtn;
-}
-
-// BUG 5241
-// Infinidb specific mbstowcs(). This will handle both windows and unix platforms
-// Params dest and max should have enough length to accomodate NULL
-inline
-size_t idb_mbstowcs(wchar_t* dest, const char* src, size_t max)
-{
-#ifdef _MSC_VER
-    // 4th param (-1) denotes to convert till hit NULL char
-    // if 6th param max = 0, will return the required buffer size
-    size_t strwclen = MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, (int)max);
-    // decrement the count of NULL; will become -1 on failure
-    return --strwclen;
-
-#else
-    return mbstowcs(dest, src, max);
-#endif
-}
-
-// BUG 5241
-// Infinidb specific wcstombs(). This will handle both windows and unix platforms
-// Params dest and max should have enough length to accomodate NULL
-inline
-size_t idb_wcstombs(char* dest, const wchar_t* src, size_t max)
-{
-#ifdef _MSC_VER
-    // 4th param (-1) denotes to convert till hit NULL char
-    //if 6th param max = 0, will return the required buffer size
-    size_t strmblen = WideCharToMultiByte( CP_UTF8, 0, src, -1, dest, (int)max, NULL, NULL);
-    // decrement the count of NULL; will become -1 on failure
-    return --strmblen;
-#else
-    return wcstombs(dest, src, max);
-#endif
-}
-
-// convert UTF-8 string to wstring
-inline
-std::wstring utf8_to_wstring (const std::string& str)
-{
-    size_t bufsize = str.length() + 1;
-
-    // Convert to wide characters. Do all further work in wide characters
-    wchar_t* wcbuf = new wchar_t[bufsize];
-    // Passing +1 so that windows is happy to see extra position to place NULL
-    size_t strwclen = idb_mbstowcs(wcbuf, str.c_str(), str.length() + 1);
-
-    // if result is -1 it means bad characters which may happen if locale is wrong.
-    // return an empty string
-    if ( strwclen == static_cast<size_t>(-1) )
-        strwclen = 0;
-
-    std::wstring ret(wcbuf, strwclen);
-
-    delete [] wcbuf;
-    return ret;
-}
-
-
-// convert wstring to UTF-8 string
-inline
-std::string wstring_to_utf8 (const std::wstring& str)
-{
-    char* outbuf = new char[(str.length() * MAX_UTF8_BYTES_PER_CHAR) + 1];
-    // Passing +1 so that windows is happy to see extra position to place NULL
-    size_t strmblen = idb_wcstombs(outbuf, str.c_str(), str.length() * MAX_UTF8_BYTES_PER_CHAR + 1);
-
-    // if result is -1 it means bad characters which may happen if locale is wrong.
-    // return an empty string
-    if ( strmblen == static_cast<size_t>(-1) )
-        strmblen = 0;
-
-    std::string ret(outbuf, strmblen);
-
-    delete [] outbuf;
-    return ret;
-}
-
-inline
-uint8_t utf8_truncate_point(const char* input, size_t length)
-{
-    // Find the beginning of a multibyte char to truncate at and return the
-    // number of bytes to truncate
-    if (length < 3)
-    {
-        return 0;
-    }
-
-    const unsigned char* b = (const unsigned char*)(input) + length - 3;
-
-    if (b[2] & 0x80)
-    {
-        // First byte in a new multi-byte sequence
-        if (b[2] & 0x40) return 1;
-        // 3 byte sequence
-        else if ((b[1] & 0xe0) == 0xe0) return 2;
-        // 4 byte sequence
-        else if ((b[0] & 0xf0) == 0xf0) return 3;
-    }
-
-    return 0;
-}
-
-} //namespace utf8
-} //namespace funcexp
-
-#endif