1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-4320/4364/4370 Fix multibyte processing for LDI/Insert...Select

For CHAR/VARCHAR/TEXT fields, the buffer size of a field represents
the field size in bytes, which can be bigger than the field size in
number of characters, for multi-byte character sets such as utf8,
utf8mb4 etc. The buffer also contains a byte length prefix which can be
up to 65532 bytes for a VARCHAR field, and much higher for a TEXT
field (we process a maximum byte length for a TEXT field which fits in
4 bytes, which is 2^32 - 1 = 4GB!).

There is also special processing for a TEXT field defined with a default
length like so:
  CREATE TABLE cs1 (a TEXT CHARACTER SET utf8)
Here, the byte length is a fixed 65535, irrespective of the character
set used. This is different from a case such as:
  CREATE TABLE cs1 (a TEXT(65535) CHARACTER SET utf8), where the byte length
for the field will be 65535*3.
This commit is contained in:
Gagan Goel
2020-10-26 17:51:24 +00:00
parent 26131a1b43
commit 13264feb7d
3 changed files with 80 additions and 141 deletions

View File

@ -3208,7 +3208,6 @@ void ha_mcs_impl_start_bulk_insert(ha_rows rows, TABLE* table, bool is_cache_ins
tableName.schema = table->s->db.str;
tableName.table = table->s->table_name.str;
ci->useXbit = false;
ci->utf8 = false;
CalpontSystemCatalog::RIDList colrids;
try
@ -3253,11 +3252,6 @@ void ha_mcs_impl_start_bulk_insert(ha_rows rows, TABLE* table, bool is_cache_ins
else
ci->headerLength = (1 + colrids.size() + 7 - numberNotNull) / 8;
if ((strncmp(table->s->table_charset->comment, "UTF-8", 5) == 0) || (strncmp(table->s->table_charset->comment, "utf-8", 5) == 0))
{
ci->utf8 = true;
}
//Log the statement to debug.log
{
ostringstream oss;