You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
MCOL-5573 Fix cpimport truncation of TEXT columns.
1. Restore the utf8_truncate_point() function in utils/common/utils_utf8.h that I removed as part of the patch for MCOL-4931. 2. As per the definition of TEXT columns, the default column width represents the maximum number of bytes that can be stored in the TEXT column. So the effective maximum length is less if the value contains multi-byte characters. However, if the user explicitly specifies the length of the TEXT column in a table DDL, such as TEXT(65535), then the DDL logic ensures that enough number of bytes are allocated (upto a system maximum) to allow upto that many number of characters (multi-byte characters if the charset for the column is multi-byte, such as utf8mb3).
This commit is contained in:
@ -91,6 +91,33 @@ inline std::string wstring_to_utf8(const std::wstring& str)
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline uint8_t utf8_truncate_point(const char* input, size_t length)
|
||||
{
|
||||
// Find the beginning of a multibyte char to truncate at and return the
|
||||
// number of bytes to truncate1`
|
||||
if (length < 3)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const unsigned char* b = (const unsigned char*)(input) + length - 3;
|
||||
|
||||
if (b[2] & 0x80)
|
||||
{
|
||||
// First byte in a new multi-byte sequence
|
||||
if (b[2] & 0x40)
|
||||
return 1;
|
||||
// 3 byte sequence
|
||||
else if ((b[1] & 0xe0) == 0xe0)
|
||||
return 2;
|
||||
// 4 byte sequence
|
||||
else if ((b[0] & 0xf0) == 0xf0)
|
||||
return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mcs_strcoll(const char* str1, const char* str2, const uint32_t charsetNumber);
|
||||
int mcs_strcoll(const char* str1, const uint32_t l1, const char* str2, const uint32_t l2,
|
||||
const uint32_t charsetNumber);
|
||||
|
Reference in New Issue
Block a user