1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-5005 Add charset number to system catalog - Part 1.

This patch improves/fixes the existing handling of CHARSET and
COLLATION symbols in the ColumnStore DDL parser.

Also, add fCollate and fCharsetNum member variables to the
ddlpackage::ColumnType class.
This commit is contained in:
Gagan Goel
2023-07-28 17:56:09 -04:00
parent 1a49a09af3
commit a36ea6dbb4
5 changed files with 70 additions and 30 deletions

View File

@ -56,24 +56,42 @@ int ddllex(YYSTYPE* ddllval, void* yyscanner);
void ddlerror(struct pass_to_bison* x, char const *s); void ddlerror(struct pass_to_bison* x, char const *s);
char* copy_string(const char *str); char* copy_string(const char *str);
void fix_column_length(SchemaObject* elem, const CHARSET_INFO* def_cs) { void fix_column_length(SchemaObject* elem, const CHARSET_INFO* def_cs)
{
auto* column = dynamic_cast<ColumnDef*>(elem); auto* column = dynamic_cast<ColumnDef*>(elem);
if (column == NULL || column->fType == NULL) if (column == NULL || column->fType == NULL)
{ {
return; return;
} }
if (column->fType->fType == DDL_VARCHAR || if (column->fType->fType == DDL_BLOB ||
column->fType->fType == DDL_CHAR || column->fType->fType == DDL_VARBINARY)
(column->fType->fType == DDL_TEXT && column->fType->fExplicitLength))
{ {
unsigned mul = def_cs ? def_cs->mbmaxlen : 1; CHARSET_INFO* cs = &my_charset_bin;
if (column->fType->fCharset) { column->fType->fCharset = cs->cs_name.str;
const CHARSET_INFO* cs = get_charset_by_csname(column->fType->fCharset, MY_CS_PRIMARY, MYF(0)); column->fType->fCollate = cs->coll_name.str;
if (cs) column->fType->fCharsetNum = cs->number;
mul = cs->mbmaxlen; return;
} }
column->fType->fLength *= mul;
if (column->fType->fType == DDL_VARCHAR ||
column->fType->fType == DDL_CHAR ||
column->fType->fType == DDL_TEXT)
{
CHARSET_INFO* cs = def_cs ? def_cs : &my_charset_latin1;
if (column->fType->fCollate)
cs = get_charset_by_name(column->fType->fCollate, MYF(0));
else if (column->fType->fCharset)
cs = get_charset_by_csname(column->fType->fCharset, MY_CS_PRIMARY, MYF(0));
column->fType->fCharset = cs->cs_name.str;
column->fType->fCollate = cs->coll_name.str;
column->fType->fCharsetNum = cs->number;
if ((column->fType->fType != DDL_TEXT) || column->fType->fExplicitLength)
column->fType->fLength *= cs->mbmaxlen;
} }
if (column->fType->fType == DDL_TEXT && column->fType->fExplicitLength) if (column->fType->fType == DDL_TEXT && column->fType->fExplicitLength)
@ -236,6 +254,7 @@ ZEROFILL
%type <str> ident %type <str> ident
%type <str> opt_quoted_literal %type <str> opt_quoted_literal
%type <str> opt_column_charset %type <str> opt_column_charset
%type <str> opt_column_collate
%% %%
stmtblock: stmtmulti { x->fParseTree = $1; } stmtblock: stmtmulti { x->fParseTree = $1; }
; ;
@ -500,9 +519,20 @@ table_options:
; ;
opt_equal: opt_equal:
{} | '=' {} /* empty */ {}
| '=' {}
; ;
opt_default:
/* empty */ {}
| DEFAULT {}
;
charset:
IDB_CHAR SET {}
| CHARSET {}
;
table_option: table_option:
ENGINE opt_equal ident {$$ = new pair<string,string>("engine", $3);} ENGINE opt_equal ident {$$ = new pair<string,string>("engine", $3);}
| |
@ -515,19 +545,13 @@ table_option:
COMMENT string_literal {$$ = new pair<string,string>("comment", $2);} COMMENT string_literal {$$ = new pair<string,string>("comment", $2);}
| |
AUTO_INCREMENT opt_equal ICONST AUTO_INCREMENT opt_equal ICONST
{ {
$$ = new pair<string,string>("auto_increment", $3); $$ = new pair<string,string>("auto_increment", $3);
} }
| |
DEFAULT CHARSET opt_equal ident {$$ = new pair<string,string>("default charset", $4);} opt_default charset opt_equal opt_quoted_literal {$$ = new pair<string,string>("default charset", $4);}
|
CHARSET opt_equal ident {$$ = new pair<string, string>("default charset", $3);}
| |
DEFAULT IDB_CHAR SET opt_equal ident {$$ = new pair<string,string>("default charset", $5);} opt_default COLLATE opt_equal opt_quoted_literal {$$ = new pair<string, string>("default collate", $4);}
|
DEFAULT COLLATE opt_equal opt_quoted_literal {$$ = new pair<string, string>("default collate", $4);}
|
COLLATE opt_equal opt_quoted_literal {$$ = new pair<string, string>("default collate", $3);}
; ;
alter_table_statement: alter_table_statement:
@ -780,18 +804,19 @@ optional_braces:
opt_column_charset: opt_column_charset:
/* empty */ { $$ = NULL; } /* empty */ { $$ = NULL; }
| |
IDB_CHAR SET opt_quoted_literal { $$ = $3; } charset opt_quoted_literal { $$ = $2; }
; ;
opt_column_collate: opt_column_collate:
/* empty */ {} /* empty */ { $$ = NULL; }
| |
COLLATE opt_quoted_literal {} COLLATE opt_quoted_literal { $$ = $2; }
; ;
data_type: data_type:
character_string_type opt_column_charset opt_column_collate { character_string_type opt_column_charset opt_column_collate {
$1->fCharset = $2; $1->fCharset = $2;
$1->fCollate = $3;
$$ = $1; $$ = $1;
} }
| binary_string_type | binary_string_type
@ -800,6 +825,7 @@ data_type:
| blob_type | blob_type
| text_type opt_column_charset opt_column_collate { | text_type opt_column_charset opt_column_collate {
$1->fCharset = $2; $1->fCharset = $2;
$1->fCollate = $3;
$$ = $1; $$ = $1;
} }
| IDB_BLOB | IDB_BLOB

View File

@ -64,13 +64,16 @@ ColumnType::ColumnType(int prec, int scale)
, fScale(scale) , fScale(scale)
, fWithTimezone(false) , fWithTimezone(false)
, fCharset(NULL) , fCharset(NULL)
, fCollate(NULL)
, fCharsetNum(0)
, fExplicitLength(false) , fExplicitLength(false)
{ {
fLength = utils::widthByPrecision(fPrecision); fLength = utils::widthByPrecision(fPrecision);
} }
ColumnType::ColumnType(int type) ColumnType::ColumnType(int type)
: fType(type), fLength(0), fScale(0), fWithTimezone(false), fCharset(NULL), fExplicitLength(false) : fType(type), fLength(0), fScale(0), fWithTimezone(false),
fCharset(NULL), fCollate(NULL), fCharsetNum(0), fExplicitLength(false)
{ {
switch (type) switch (type)
{ {

View File

@ -934,7 +934,7 @@ struct ColumnType
EXPORT int serialize(messageqcpp::ByteStream& bs); EXPORT int serialize(messageqcpp::ByteStream& bs);
/** @brief For deserialization. */ /** @brief For deserialization. */
ColumnType() : fCharset(NULL), fExplicitLength(false) ColumnType() : fCharset(NULL), fCollate(NULL), fCharsetNum(0), fExplicitLength(false)
{ {
} }
@ -978,6 +978,10 @@ struct ColumnType
/** @brief Column charset (CHAR, VARCHAR and TEXT only) */ /** @brief Column charset (CHAR, VARCHAR and TEXT only) */
const char* fCharset; const char* fCharset;
/** @brief Column collation (CHAR, VARCHAR and TEXT only) */
const char* fCollate;
/** @brief Column charset number (CHAR, VARCHAR and TEXT only) */
uint32_t fCharsetNum;
/** @brief Is the TEXT column has explicit defined length, ie TEXT(1717) */ /** @brief Is the TEXT column has explicit defined length, ie TEXT(1717) */
bool fExplicitLength; bool fExplicitLength;

View File

@ -1109,6 +1109,7 @@ int ColumnType::unserialize(ByteStream& bytestream)
messageqcpp::ByteStream::quadbyte compressiontype; messageqcpp::ByteStream::quadbyte compressiontype;
std::string autoincrement; std::string autoincrement;
messageqcpp::ByteStream::octbyte nextVal; messageqcpp::ByteStream::octbyte nextVal;
messageqcpp::ByteStream::quadbyte charsetNum;
// read column types // read column types
bytestream >> ftype; bytestream >> ftype;
@ -1119,6 +1120,7 @@ int ColumnType::unserialize(ByteStream& bytestream)
bytestream >> compressiontype; bytestream >> compressiontype;
bytestream >> autoincrement; bytestream >> autoincrement;
bytestream >> nextVal; bytestream >> nextVal;
bytestream >> charsetNum;
fType = ftype; fType = ftype;
fLength = length; fLength = length;
@ -1128,6 +1130,7 @@ int ColumnType::unserialize(ByteStream& bytestream)
fCompressiontype = compressiontype; fCompressiontype = compressiontype;
fAutoincrement = autoincrement; fAutoincrement = autoincrement;
fNextvalue = nextVal; fNextvalue = nextVal;
fCharsetNum = charsetNum;
// cout << "BS length = " << bytestream.length() << endl; // cout << "BS length = " << bytestream.length() << endl;
@ -1147,6 +1150,7 @@ int ColumnType::serialize(ByteStream& bytestream)
messageqcpp::ByteStream::quadbyte compressiontype = fCompressiontype; messageqcpp::ByteStream::quadbyte compressiontype = fCompressiontype;
std::string autoincrement = fAutoincrement; std::string autoincrement = fAutoincrement;
messageqcpp::ByteStream::octbyte nextVal = fNextvalue; messageqcpp::ByteStream::octbyte nextVal = fNextvalue;
messageqcpp::ByteStream::quadbyte charsetNum = fCharsetNum;
// write column types // write column types
bytestream << ftype; bytestream << ftype;
@ -1157,6 +1161,7 @@ int ColumnType::serialize(ByteStream& bytestream)
bytestream << compressiontype; bytestream << compressiontype;
bytestream << autoincrement; bytestream << autoincrement;
bytestream << nextVal; bytestream << nextVal;
bytestream << charsetNum;
// cout << "BS length = " << bytestream.length() << endl; // cout << "BS length = " << bytestream.length() << endl;

View File

@ -2515,7 +2515,8 @@ int ha_mcs_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* crea
const CHARSET_INFO* field_cs = (*field)->charset(); const CHARSET_INFO* field_cs = (*field)->charset();
if (field_cs && (!share->table_charset || field_cs->number != share->table_charset->number)) if (field_cs && (!share->table_charset || field_cs->number != share->table_charset->number))
{ {
oss << " CHARACTER SET " << field_cs->cs_name.str; oss << " CHARACTER SET " << field_cs->cs_name.str <<
" COLLATE " << field_cs->coll_name.str;
} }
} }
@ -2555,7 +2556,8 @@ int ha_mcs_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* crea
if (share->table_charset) if (share->table_charset)
{ {
oss << " DEFAULT CHARSET=" << share->table_charset->cs_name.str; oss << " DEFAULT CHARSET=" << share->table_charset->cs_name.str <<
" COLLATE=" << share->table_charset->coll_name.str;
} }
// Process table level options such as MIN_ROWS, MAX_ROWS, COMMENT // Process table level options such as MIN_ROWS, MAX_ROWS, COMMENT