You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
MCOL-3536 Collation
This commit is contained in:
@ -6672,7 +6672,8 @@ int processLimitAndOffset(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We don't currently support limit with correlated subquery
|
// We don't currently support limit with correlated subquery
|
||||||
if (gwi.subQuery && !gwi.correlatedTbNameVec.empty() && csep->hasOrderBy())
|
if (csep->limitNum() != (uint64_t) - 1 &&
|
||||||
|
gwi.subQuery && !gwi.correlatedTbNameVec.empty())
|
||||||
{
|
{
|
||||||
gwi.fatalParseError = true;
|
gwi.fatalParseError = true;
|
||||||
gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_LIMIT_SUB);
|
gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_LIMIT_SUB);
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -56,37 +59,58 @@ int64_t Func_find_in_set::getIntVal(rowgroup::Row& row,
|
|||||||
CalpontSystemCatalog::ColType& op_ct)
|
CalpontSystemCatalog::ColType& op_ct)
|
||||||
{
|
{
|
||||||
const string& searchStr = parm[0]->data()->getStrVal(row, isNull);
|
const string& searchStr = parm[0]->data()->getStrVal(row, isNull);
|
||||||
|
|
||||||
if (isNull)
|
if (isNull)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
const string& setString = parm[1]->data()->getStrVal(row, isNull);
|
const string& setString = parm[1]->data()->getStrVal(row, isNull);
|
||||||
|
|
||||||
if (isNull)
|
if (isNull)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (searchStr.find(",") != string::npos)
|
if (searchStr.find(",") != string::npos)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
string newSearchStr(searchStr.substr(0, strlen(searchStr.c_str())));
|
if (setString.length() > searchStr.length())
|
||||||
string newSetString(setString.substr(0, strlen(setString.c_str())));
|
return 0;
|
||||||
//tokenize the setStr with comma as seprator.
|
|
||||||
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
|
CHARSET_INFO *cs= op_ct.getCharset();
|
||||||
boost::char_separator<char> sep( ",");
|
|
||||||
tokenizer tokens(newSetString, sep);
|
|
||||||
|
|
||||||
unsigned i = 0;
|
my_wc_t wc= 0;
|
||||||
size_t pos = 0;
|
const char *str_begin= setString.c_str();
|
||||||
|
const char *str_end= setString.c_str();
|
||||||
for (tokenizer::iterator tok_iter = tokens.begin(); tok_iter != tokens.end(); ++tok_iter)
|
const char *real_end= str_end + setString.length();
|
||||||
|
const char *find_str= searchStr.c_str();
|
||||||
|
uint find_str_len= searchStr.length();
|
||||||
|
int position= 0;
|
||||||
|
static const char separator=',';
|
||||||
|
while (1)
|
||||||
{
|
{
|
||||||
pos = (*tok_iter).find(newSearchStr);
|
int symbol_len;
|
||||||
i++;
|
if ((symbol_len= cs->mb_wc(&wc, (uchar*) str_end,
|
||||||
|
(uchar*) real_end)) > 0)
|
||||||
if (( pos != string::npos) && (newSearchStr.length() == (*tok_iter).length()))
|
{
|
||||||
return i;
|
const char *substr_end= str_end + symbol_len;
|
||||||
|
bool is_last_item= (substr_end == real_end);
|
||||||
|
bool is_separator= (wc == (my_wc_t) separator);
|
||||||
|
if (is_separator || is_last_item)
|
||||||
|
{
|
||||||
|
position++;
|
||||||
|
if (is_last_item && !is_separator)
|
||||||
|
str_end= substr_end;
|
||||||
|
if (!cs->strnncoll(str_begin, (uint) (str_end - str_begin),
|
||||||
|
find_str, find_str_len))
|
||||||
|
return (longlong) position;
|
||||||
|
else
|
||||||
|
str_begin= substr_end;
|
||||||
|
}
|
||||||
|
str_end= substr_end;
|
||||||
|
}
|
||||||
|
else if (str_end - str_begin == 0 &&
|
||||||
|
find_str_len == 0 &&
|
||||||
|
wc == (my_wc_t) separator)
|
||||||
|
return (longlong) ++position;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,22 +53,22 @@ bool boolVal(SPTP& parm, Row& row, const string& timeZone)
|
|||||||
case CalpontSystemCatalog::TEXT:
|
case CalpontSystemCatalog::TEXT:
|
||||||
case CalpontSystemCatalog::VARCHAR:
|
case CalpontSystemCatalog::VARCHAR:
|
||||||
ret = (atoi((char*)(parm->data()->getStrVal(timeZone).c_str())) != 0);
|
ret = (atoi((char*)(parm->data()->getStrVal(timeZone).c_str())) != 0);
|
||||||
|
break;
|
||||||
case CalpontSystemCatalog::FLOAT:
|
case CalpontSystemCatalog::FLOAT:
|
||||||
case CalpontSystemCatalog::UFLOAT:
|
case CalpontSystemCatalog::UFLOAT:
|
||||||
ret = (parm->data()->getFloatVal(row, isNull) != 0);
|
ret = (parm->data()->getFloatVal(row, isNull) != 0);
|
||||||
|
break;
|
||||||
case CalpontSystemCatalog::DOUBLE:
|
case CalpontSystemCatalog::DOUBLE:
|
||||||
case CalpontSystemCatalog::UDOUBLE:
|
case CalpontSystemCatalog::UDOUBLE:
|
||||||
ret = (parm->data()->getDoubleVal(row, isNull) != 0);
|
ret = (parm->data()->getDoubleVal(row, isNull) != 0);
|
||||||
|
break;
|
||||||
case CalpontSystemCatalog::LONGDOUBLE:
|
case CalpontSystemCatalog::LONGDOUBLE:
|
||||||
ret = (parm->data()->getLongDoubleVal(row, isNull) != 0);
|
ret = (parm->data()->getLongDoubleVal(row, isNull) != 0);
|
||||||
|
break;
|
||||||
case CalpontSystemCatalog::DECIMAL:
|
case CalpontSystemCatalog::DECIMAL:
|
||||||
case CalpontSystemCatalog::UDECIMAL:
|
case CalpontSystemCatalog::UDECIMAL:
|
||||||
ret = (parm->data()->getDecimalVal(row, isNull).value != 0);
|
ret = (parm->data()->getDecimalVal(row, isNull).value != 0);
|
||||||
|
break;
|
||||||
case CalpontSystemCatalog::BIGINT:
|
case CalpontSystemCatalog::BIGINT:
|
||||||
case CalpontSystemCatalog::SMALLINT:
|
case CalpontSystemCatalog::SMALLINT:
|
||||||
case CalpontSystemCatalog::MEDINT:
|
case CalpontSystemCatalog::MEDINT:
|
||||||
@ -83,6 +83,7 @@ bool boolVal(SPTP& parm, Row& row, const string& timeZone)
|
|||||||
case CalpontSystemCatalog::TIME:
|
case CalpontSystemCatalog::TIME:
|
||||||
default:
|
default:
|
||||||
ret = (parm->data()->getIntVal(row, isNull) != 0);
|
ret = (parm->data()->getIntVal(row, isNull) != 0);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -48,69 +51,61 @@ CalpontSystemCatalog::ColType Func_insert::operationType(FunctionParm& fp, Calpo
|
|||||||
return fp[0]->data()->resultType();
|
return fp[0]->data()->resultType();
|
||||||
}
|
}
|
||||||
|
|
||||||
string insertStr(const string& src, int pos, int len, const string& targ)
|
|
||||||
{
|
|
||||||
int64_t strLen = static_cast<int64_t>(src.length());
|
|
||||||
|
|
||||||
if ((pos <= 0) || ((pos - 1) >= strLen))
|
|
||||||
return src;
|
|
||||||
|
|
||||||
if ((len < 0) || (len > strLen))
|
|
||||||
len = strLen;
|
|
||||||
|
|
||||||
const char* srcptr = src.c_str();
|
|
||||||
advance(srcptr, pos - 1, srcptr + strLen);
|
|
||||||
// srcptr now pointing to where we need to insert targ string
|
|
||||||
|
|
||||||
uint32_t srcPos = srcptr - src.c_str();
|
|
||||||
|
|
||||||
uint32_t finPos = strLen;
|
|
||||||
const char* finptr = src.c_str();
|
|
||||||
|
|
||||||
if ((strLen - (pos - 1 + len)) >= 0)
|
|
||||||
{
|
|
||||||
advance(finptr, (pos - 1 + len), finptr + strLen);
|
|
||||||
// finptr now pointing to the end of the string to replace
|
|
||||||
finPos = finptr - src.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
string out;
|
|
||||||
out.reserve(srcPos + targ.length() + strLen - finPos + 1);
|
|
||||||
out.append( src.c_str(), srcPos );
|
|
||||||
out.append( targ.c_str(), targ.length() );
|
|
||||||
out.append( src.c_str() + finPos, strLen - finPos );
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string Func_insert::getStrVal(rowgroup::Row& row,
|
std::string Func_insert::getStrVal(rowgroup::Row& row,
|
||||||
FunctionParm& fp,
|
FunctionParm& fp,
|
||||||
bool& isNull,
|
bool& isNull,
|
||||||
execplan::CalpontSystemCatalog::ColType&)
|
execplan::CalpontSystemCatalog::ColType&)
|
||||||
{
|
{
|
||||||
string tstr;
|
string src;
|
||||||
string tnewstr;
|
string tnewstr;
|
||||||
stringValue(fp[0], row, isNull, tstr);
|
int64_t start, length;
|
||||||
|
|
||||||
|
stringValue(fp[0], row, isNull, src);
|
||||||
if (isNull)
|
if (isNull)
|
||||||
{
|
|
||||||
return "";
|
return "";
|
||||||
}
|
|
||||||
|
|
||||||
stringValue(fp[3], row, isNull, tnewstr);
|
stringValue(fp[3], row, isNull, tnewstr);
|
||||||
if (isNull)
|
if (isNull)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
int64_t pos = fp[1]->data()->getIntVal(row, isNull);
|
start = fp[1]->data()->getIntVal(row, isNull);
|
||||||
|
|
||||||
if (isNull)
|
if (isNull)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
int64_t len = fp[2]->data()->getIntVal(row, isNull);
|
length = fp[2]->data()->getIntVal(row, isNull);
|
||||||
|
|
||||||
if (isNull)
|
if (isNull)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
return insertStr( tstr, pos, len, tnewstr );
|
start--; // Because SQL syntax is 1 based and we want 0 based.
|
||||||
|
|
||||||
|
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
|
||||||
|
|
||||||
|
// binLen represents the number of bytes
|
||||||
|
int64_t binLen = static_cast<int64_t>(src.length());
|
||||||
|
const char* pos = src.c_str();
|
||||||
|
const char* end = pos + binLen;
|
||||||
|
// strLen is number of characters
|
||||||
|
int64_t strLen = cs->numchars(pos, end);
|
||||||
|
|
||||||
|
// Return the original string if start isn't within the string.
|
||||||
|
if ((start <= 1) || start >= strLen)
|
||||||
|
return src;
|
||||||
|
|
||||||
|
if ((length < 0) || (length > strLen))
|
||||||
|
length = strLen;
|
||||||
|
|
||||||
|
// Convert start and length from characters to bytes.
|
||||||
|
start = cs->charpos(pos, end, start);
|
||||||
|
length = cs->charpos(pos+start, end, length);
|
||||||
|
|
||||||
|
string out;
|
||||||
|
out.reserve(binLen - length + tnewstr.length() + 1);
|
||||||
|
|
||||||
|
out.append(src.c_str(), start);
|
||||||
|
out.append(tnewstr.c_str(), tnewstr.length());
|
||||||
|
out.append(src.c_str() + start + length, binLen - start - length);
|
||||||
|
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -20,6 +20,10 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include "errorids.h"
|
#include "errorids.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -21,6 +21,10 @@
|
|||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -20,6 +20,10 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include "errorids.h"
|
#include "errorids.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <mariadb.h>
|
#include <mariadb.h>
|
||||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -21,6 +21,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
#include <mariadb.h>
|
||||||
|
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -47,106 +50,106 @@ CalpontSystemCatalog::ColType Func_trim::operationType(FunctionParm& fp, Calpont
|
|||||||
std::string Func_trim::getStrVal(rowgroup::Row& row,
|
std::string Func_trim::getStrVal(rowgroup::Row& row,
|
||||||
FunctionParm& fp,
|
FunctionParm& fp,
|
||||||
bool& isNull,
|
bool& isNull,
|
||||||
execplan::CalpontSystemCatalog::ColType&)
|
execplan::CalpontSystemCatalog::ColType& type)
|
||||||
{
|
{
|
||||||
// The number of characters (not bytes) in our input tstr.
|
CHARSET_INFO* cs = type.getCharset();
|
||||||
// Not all of these are necessarily significant. We need to search for the
|
|
||||||
// NULL terminator to be sure.
|
|
||||||
size_t strwclen;
|
|
||||||
// this holds the number of characters (not bytes) in ourtrim tstr.
|
|
||||||
size_t trimwclen;
|
|
||||||
|
|
||||||
// The original string
|
// The original string
|
||||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
const string& src = fp[0]->data()->getStrVal(row, isNull);
|
||||||
|
if (isNull)
|
||||||
|
return "";
|
||||||
|
if (src.empty() || src.length() == 0)
|
||||||
|
return src;
|
||||||
|
// binLen represents the number of bytes in src
|
||||||
|
size_t binLen = src.length();
|
||||||
|
const char* pos = src.c_str();
|
||||||
|
const char* end = pos + binLen;
|
||||||
|
// strLen = the number of characters in src
|
||||||
|
size_t strLen = cs->numchars(pos, end);
|
||||||
|
|
||||||
// The trim characters.
|
// The trim characters.
|
||||||
const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
|
const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
|
||||||
|
// binTLen represents the number of bytes in trim
|
||||||
|
size_t binTLen = trim.length();
|
||||||
|
const char* posT = trim.c_str();
|
||||||
|
// strTLen = the number of characters in trim
|
||||||
|
size_t strTLen = cs->numchars(posT, posT+binTLen);
|
||||||
|
if (strTLen == 0 || strTLen > strLen)
|
||||||
|
return src;
|
||||||
|
|
||||||
if (isNull)
|
if (binTLen == 1)
|
||||||
return "";
|
|
||||||
|
|
||||||
if (tstr.empty() || tstr.length() == 0)
|
|
||||||
return tstr;
|
|
||||||
|
|
||||||
// Rather than calling the wideconvert functions with a null buffer to
|
|
||||||
// determine the size of buffer to allocate, we can be sure the wide
|
|
||||||
// char string won't be longer than:
|
|
||||||
strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
|
|
||||||
int bufsize = strwclen + 1;
|
|
||||||
|
|
||||||
// Convert the string to wide characters. Do all further work in wide characters
|
|
||||||
wchar_t* wcbuf = new wchar_t[bufsize];
|
|
||||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
|
|
||||||
|
|
||||||
// Bad char in mbc can return -1
|
|
||||||
if (strwclen == static_cast<size_t>(-1))
|
|
||||||
strwclen = 0;
|
|
||||||
|
|
||||||
// Convert the trim string to wide
|
|
||||||
trimwclen = trim.length(); // A guess to start.
|
|
||||||
int trimbufsize = trimwclen + 1;
|
|
||||||
wchar_t* wctrim = new wchar_t[trimbufsize];
|
|
||||||
size_t trimlen = utf8::idb_mbstowcs(wctrim, trim.c_str(), trimwclen + 1);
|
|
||||||
|
|
||||||
// Bad char in mbc can return -1
|
|
||||||
if (trimlen == static_cast<size_t>(-1))
|
|
||||||
trimlen = 0;
|
|
||||||
|
|
||||||
size_t trimCmpLen = trimlen * sizeof(wchar_t);
|
|
||||||
|
|
||||||
const wchar_t* oPtr = wcbuf; // To remember the start of the string
|
|
||||||
const wchar_t* aPtr = oPtr;
|
|
||||||
const wchar_t* aEnd = wcbuf + strwclen - 1;
|
|
||||||
size_t trimCnt = 0;
|
|
||||||
|
|
||||||
if (trimlen > 0)
|
|
||||||
{
|
{
|
||||||
if (trimlen == 1)
|
// If the trim string is 1 byte, don't waste cpu for memcmp
|
||||||
|
// Trim leading
|
||||||
|
while (pos < end && *pos == *posT)
|
||||||
{
|
{
|
||||||
// If trim is a single char, then don't spend the overhead for memcmp.
|
++pos;
|
||||||
wchar_t chr = wctrim[0];
|
--binLen;
|
||||||
|
|
||||||
// remove leading
|
|
||||||
while (aPtr != aEnd && *aPtr == chr)
|
|
||||||
{
|
|
||||||
aPtr++;
|
|
||||||
++trimCnt;
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove trailing
|
|
||||||
while (aEnd != aPtr && *aEnd == chr)
|
|
||||||
{
|
|
||||||
aEnd--;
|
|
||||||
++trimCnt;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
// Trim trailing
|
||||||
|
while (end > pos && *end == *posT)
|
||||||
{
|
{
|
||||||
aEnd -= (trimlen - 1); // So we don't compare past the end of the string.
|
--end;
|
||||||
|
--binLen;
|
||||||
// remove leading
|
}
|
||||||
while (aPtr <= aEnd && !memcmp(aPtr, wctrim, trimCmpLen))
|
}
|
||||||
|
else if (!cs->use_mb())
|
||||||
|
{
|
||||||
|
// This is a one byte per char charset with multiple char trim.
|
||||||
|
// Trim leading
|
||||||
|
while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
|
||||||
|
{
|
||||||
|
pos += binTLen;
|
||||||
|
binLen -= binTLen;
|
||||||
|
}
|
||||||
|
// Trim trailing
|
||||||
|
while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
|
||||||
|
{
|
||||||
|
end -= binTLen;
|
||||||
|
binLen -= binTLen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We're using a multi-byte charset
|
||||||
|
// Trim leading is easy
|
||||||
|
while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
|
||||||
|
{
|
||||||
|
pos += binTLen;
|
||||||
|
binLen -= binTLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim trailing
|
||||||
|
// The problem is that the byte pattern at the end could
|
||||||
|
// match memcmp, but not be correct since the first byte compared
|
||||||
|
// may actually be a second or later byte from a previous char.
|
||||||
|
|
||||||
|
// We start at the beginning of the string and move forward
|
||||||
|
// one character at a time until we reach the end. Then we can
|
||||||
|
// safely compare.
|
||||||
|
while (end - binTLen >= pos)
|
||||||
|
{
|
||||||
|
const char* p = pos;
|
||||||
|
uint32 l;
|
||||||
|
while (p + binTLen < end)
|
||||||
{
|
{
|
||||||
aPtr += trimlen;
|
if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
|
||||||
trimCnt += trimlen;
|
p += l;
|
||||||
|
else
|
||||||
|
++p;
|
||||||
}
|
}
|
||||||
|
if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
|
||||||
// remove trailing
|
|
||||||
while (aPtr <= aEnd && !memcmp(aEnd, wctrim, trimCmpLen))
|
|
||||||
{
|
{
|
||||||
aEnd -= trimlen; //BUG 5241
|
end -= binTLen;
|
||||||
trimCnt += trimlen;
|
binLen -= binTLen;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break; // We've run out of places to look
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bug 5110 - error in allocating enough memory for utf8 chars
|
|
||||||
size_t aLen = strwclen - trimCnt;
|
|
||||||
wstring trimmed = wstring(aPtr, aLen);
|
|
||||||
// Turn back to a string
|
// Turn back to a string
|
||||||
std::string ret(utf8::wstring_to_utf8(trimmed.c_str()));
|
std::string ret(pos, binLen);
|
||||||
delete [] wctrim;
|
|
||||||
delete [] wcbuf;
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user