1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

MCOL-444 Truncate UTF8 correctly

cpimport would truncate UTF8 data half way through a character which
would cause problems for functions using that data. This patch
calculates the correct truncation point when inserting the data.
This commit is contained in:
Andrew Hutchings
2017-11-29 10:43:57 +00:00
parent 9b65a86ce2
commit 3d5bd3809c
3 changed files with 30 additions and 2 deletions

View File

@ -41,6 +41,8 @@
#include "joblisttypes.h"
#include "utils_utf8.h"
using namespace std;
using namespace boost;
using namespace execplan;
@ -513,7 +515,8 @@ void BulkLoadBuffer::convert(char *field, int fieldLength,
// on disk (e.g. 5 for a varchar(5) instead of 8).
if (fieldLength > column.definedWidth)
{
memcpy( charTmpBuf, field, column.definedWidth );
uint8_t truncate_point = funcexp::utf8::utf8_truncate_point(field, column.definedWidth);
memcpy( charTmpBuf, field, column.definedWidth - truncate_point );
bufStats.satCount++;
}
else

View File

@ -47,6 +47,7 @@ using namespace BRM;
#include "IDBPolicy.h"
#include "cacheutils.h"
using namespace idbdatafile;
#include "utils_utf8.h"
namespace
{
@ -731,7 +732,8 @@ int Dctnry::insertDctnry(const char* buf,
// @Bug 2565: Truncate any strings longer than schema's column width
if (curSig.size > m_colWidth)
{
curSig.size = m_colWidth;
uint8_t truncate_point = funcexp::utf8::utf8_truncate_point((const char*)curSig.signature, m_colWidth);
curSig.size = m_colWidth - truncate_point;
++truncCount;
}