From e3cd20538887c037faea1f93cdda32763f7fc7fb Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 11 Jun 2019 10:37:04 +0100 Subject: [PATCH] MCOL-1968 Fix UTF char/varchar min/max handling If the first byte of a char/varchar was > 0x80 then it will break the min/max values for an extent during cpimport. This patch makes the min/max compare unsigned and only switches to signed when storing. In addition send all the LDI / INSERT...SELECT data to cpimport, not truncated. Let cpimport figure out the truncation point. --- dbcon/mysql/ha_calpont_dml.cpp | 18 ++++++++++-------- writeengine/bulk/we_bulkloadbuffer.cpp | 11 +++++++---- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/dbcon/mysql/ha_calpont_dml.cpp b/dbcon/mysql/ha_calpont_dml.cpp index c2dc36e47..a9404c223 100755 --- a/dbcon/mysql/ha_calpont_dml.cpp +++ b/dbcon/mysql/ha_calpont_dml.cpp @@ -860,7 +860,13 @@ int ha_calpont_impl_write_batch_row_(uchar *buf, TABLE* table, cal_impl_if::cal_ } case CalpontSystemCatalog::VARCHAR: { - if (nullVal && (ci.columnTypes[colpos].constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT)) + size_t length; + if (ci.utf8) + length = (ci.columnTypes[colpos].colWidth * 3); + else + length = ci.columnTypes[colpos].colWidth; + + if (nullVal && (ci.columnTypes[colpos].constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT)) { fprintf(ci.filePtr, "%c", ci.delimiter); if (!ci.utf8) @@ -901,6 +907,7 @@ int ha_calpont_impl_write_batch_row_(uchar *buf, TABLE* table, cal_impl_if::cal_ dataLength = *(uint16_t*) buf; buf = buf + 2 ; } + length = dataLength; escape.assign((char*)buf, dataLength); boost::replace_all(escape, "\\", "\\\\"); fprintf(ci.filePtr, "%c%.*s%c%c", ci.enclosed_by, (int)escape.length(), escape.c_str(), ci.enclosed_by, ci.delimiter); @@ -917,8 +924,7 @@ int ha_calpont_impl_write_batch_row_(uchar *buf, TABLE* table, cal_impl_if::cal_ dataLength = *(uint16_t*) buf; buf = buf + 2 ; } - if ( dataLength > ci.columnTypes[colpos].colWidth) - dataLength = ci.columnTypes[colpos].colWidth; + length = dataLength; escape.assign((char*)buf, dataLength); boost::replace_all(escape, "\\", "\\\\"); @@ -926,11 +932,7 @@ int ha_calpont_impl_write_batch_row_(uchar *buf, TABLE* table, cal_impl_if::cal_ fprintf(ci.filePtr, "%c%.*s%c%c", ci.enclosed_by, (int)escape.length(), escape.c_str(), ci.enclosed_by, ci.delimiter); } } - //buf += ci.columnTypes[colpos].colWidth; - if (ci.utf8) - buf += (ci.columnTypes[colpos].colWidth * 3); - else - buf += ci.columnTypes[colpos].colWidth; + buf += length; break; } diff --git a/writeengine/bulk/we_bulkloadbuffer.cpp b/writeengine/bulk/we_bulkloadbuffer.cpp index b1902e98f..7326f987b 100644 --- a/writeengine/bulk/we_bulkloadbuffer.cpp +++ b/writeengine/bulk/we_bulkloadbuffer.cpp @@ -524,13 +524,16 @@ void BulkLoadBuffer::convert(char *field, int fieldLength, } // Swap byte order before comparing character string - int64_t binChar = static_cast( uint64ToStr( - *(reinterpret_cast(charTmpBuf)) ) ); + // Compare must be unsigned + uint64_t compChar = uint64ToStr( *(reinterpret_cast(charTmpBuf)) ); + int64_t binChar = static_cast( compChar ); // Update min/max range - if (binChar < bufStats.minBufferVal) + uint64_t minVal = static_cast( bufStats.minBufferVal ); + uint64_t maxVal = static_cast( bufStats.maxBufferVal ); + if (compChar < minVal) bufStats.minBufferVal = binChar; - if (binChar > bufStats.maxBufferVal) + if (compChar > maxVal) bufStats.maxBufferVal = binChar; pVal = charTmpBuf;