1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-5005 Add charset number to system catalog - Part 1.

This patch improves/fixes the existing handling of CHARSET and
COLLATION symbols in the ColumnStore DDL parser.

Also, add fCollate and fCharsetNum member variables to the
ddlpackage::ColumnType class.
This commit is contained in:
Gagan Goel
2023-07-28 17:56:09 -04:00
parent 1a49a09af3
commit a36ea6dbb4
5 changed files with 70 additions and 30 deletions

View File

@ -56,24 +56,42 @@ int ddllex(YYSTYPE* ddllval, void* yyscanner);
void ddlerror(struct pass_to_bison* x, char const *s);
char* copy_string(const char *str);
void fix_column_length(SchemaObject* elem, const CHARSET_INFO* def_cs) {
void fix_column_length(SchemaObject* elem, const CHARSET_INFO* def_cs)
{
auto* column = dynamic_cast<ColumnDef*>(elem);
if (column == NULL || column->fType == NULL)
{
return;
}
if (column->fType->fType == DDL_VARCHAR ||
column->fType->fType == DDL_CHAR ||
(column->fType->fType == DDL_TEXT && column->fType->fExplicitLength))
if (column->fType->fType == DDL_BLOB ||
column->fType->fType == DDL_VARBINARY)
{
unsigned mul = def_cs ? def_cs->mbmaxlen : 1;
if (column->fType->fCharset) {
const CHARSET_INFO* cs = get_charset_by_csname(column->fType->fCharset, MY_CS_PRIMARY, MYF(0));
if (cs)
mul = cs->mbmaxlen;
}
column->fType->fLength *= mul;
CHARSET_INFO* cs = &my_charset_bin;
column->fType->fCharset = cs->cs_name.str;
column->fType->fCollate = cs->coll_name.str;
column->fType->fCharsetNum = cs->number;
return;
}
if (column->fType->fType == DDL_VARCHAR ||
column->fType->fType == DDL_CHAR ||
column->fType->fType == DDL_TEXT)
{
CHARSET_INFO* cs = def_cs ? def_cs : &my_charset_latin1;
if (column->fType->fCollate)
cs = get_charset_by_name(column->fType->fCollate, MYF(0));
else if (column->fType->fCharset)
cs = get_charset_by_csname(column->fType->fCharset, MY_CS_PRIMARY, MYF(0));
column->fType->fCharset = cs->cs_name.str;
column->fType->fCollate = cs->coll_name.str;
column->fType->fCharsetNum = cs->number;
if ((column->fType->fType != DDL_TEXT) || column->fType->fExplicitLength)
column->fType->fLength *= cs->mbmaxlen;
}
if (column->fType->fType == DDL_TEXT && column->fType->fExplicitLength)
@ -236,6 +254,7 @@ ZEROFILL
%type <str> ident
%type <str> opt_quoted_literal
%type <str> opt_column_charset
%type <str> opt_column_collate
%%
stmtblock: stmtmulti { x->fParseTree = $1; }
;
@ -500,9 +519,20 @@ table_options:
;
opt_equal:
{} | '=' {}
/* empty */ {}
| '=' {}
;
opt_default:
/* empty */ {}
| DEFAULT {}
;
charset:
IDB_CHAR SET {}
| CHARSET {}
;
table_option:
ENGINE opt_equal ident {$$ = new pair<string,string>("engine", $3);}
|
@ -515,19 +545,13 @@ table_option:
COMMENT string_literal {$$ = new pair<string,string>("comment", $2);}
|
AUTO_INCREMENT opt_equal ICONST
{
$$ = new pair<string,string>("auto_increment", $3);
}
{
$$ = new pair<string,string>("auto_increment", $3);
}
|
DEFAULT CHARSET opt_equal ident {$$ = new pair<string,string>("default charset", $4);}
|
CHARSET opt_equal ident {$$ = new pair<string, string>("default charset", $3);}
opt_default charset opt_equal opt_quoted_literal {$$ = new pair<string,string>("default charset", $4);}
|
DEFAULT IDB_CHAR SET opt_equal ident {$$ = new pair<string,string>("default charset", $5);}
|
DEFAULT COLLATE opt_equal opt_quoted_literal {$$ = new pair<string, string>("default collate", $4);}
|
COLLATE opt_equal opt_quoted_literal {$$ = new pair<string, string>("default collate", $3);}
opt_default COLLATE opt_equal opt_quoted_literal {$$ = new pair<string, string>("default collate", $4);}
;
alter_table_statement:
@ -780,18 +804,19 @@ optional_braces:
opt_column_charset:
/* empty */ { $$ = NULL; }
|
IDB_CHAR SET opt_quoted_literal { $$ = $3; }
charset opt_quoted_literal { $$ = $2; }
;
opt_column_collate:
/* empty */ {}
/* empty */ { $$ = NULL; }
|
COLLATE opt_quoted_literal {}
COLLATE opt_quoted_literal { $$ = $2; }
;
data_type:
character_string_type opt_column_charset opt_column_collate {
$1->fCharset = $2;
$1->fCollate = $3;
$$ = $1;
}
| binary_string_type
@ -800,6 +825,7 @@ data_type:
| blob_type
| text_type opt_column_charset opt_column_collate {
$1->fCharset = $2;
$1->fCollate = $3;
$$ = $1;
}
| IDB_BLOB

View File

@ -64,13 +64,16 @@ ColumnType::ColumnType(int prec, int scale)
, fScale(scale)
, fWithTimezone(false)
, fCharset(NULL)
, fCollate(NULL)
, fCharsetNum(0)
, fExplicitLength(false)
{
fLength = utils::widthByPrecision(fPrecision);
}
ColumnType::ColumnType(int type)
: fType(type), fLength(0), fScale(0), fWithTimezone(false), fCharset(NULL), fExplicitLength(false)
: fType(type), fLength(0), fScale(0), fWithTimezone(false),
fCharset(NULL), fCollate(NULL), fCharsetNum(0), fExplicitLength(false)
{
switch (type)
{

View File

@ -934,7 +934,7 @@ struct ColumnType
EXPORT int serialize(messageqcpp::ByteStream& bs);
/** @brief For deserialization. */
ColumnType() : fCharset(NULL), fExplicitLength(false)
ColumnType() : fCharset(NULL), fCollate(NULL), fCharsetNum(0), fExplicitLength(false)
{
}
@ -978,6 +978,10 @@ struct ColumnType
/** @brief Column charset (CHAR, VARCHAR and TEXT only) */
const char* fCharset;
/** @brief Column collation (CHAR, VARCHAR and TEXT only) */
const char* fCollate;
/** @brief Column charset number (CHAR, VARCHAR and TEXT only) */
uint32_t fCharsetNum;
/** @brief Is the TEXT column has explicit defined length, ie TEXT(1717) */
bool fExplicitLength;

View File

@ -1109,6 +1109,7 @@ int ColumnType::unserialize(ByteStream& bytestream)
messageqcpp::ByteStream::quadbyte compressiontype;
std::string autoincrement;
messageqcpp::ByteStream::octbyte nextVal;
messageqcpp::ByteStream::quadbyte charsetNum;
// read column types
bytestream >> ftype;
@ -1119,6 +1120,7 @@ int ColumnType::unserialize(ByteStream& bytestream)
bytestream >> compressiontype;
bytestream >> autoincrement;
bytestream >> nextVal;
bytestream >> charsetNum;
fType = ftype;
fLength = length;
@ -1128,6 +1130,7 @@ int ColumnType::unserialize(ByteStream& bytestream)
fCompressiontype = compressiontype;
fAutoincrement = autoincrement;
fNextvalue = nextVal;
fCharsetNum = charsetNum;
// cout << "BS length = " << bytestream.length() << endl;
@ -1147,6 +1150,7 @@ int ColumnType::serialize(ByteStream& bytestream)
messageqcpp::ByteStream::quadbyte compressiontype = fCompressiontype;
std::string autoincrement = fAutoincrement;
messageqcpp::ByteStream::octbyte nextVal = fNextvalue;
messageqcpp::ByteStream::quadbyte charsetNum = fCharsetNum;
// write column types
bytestream << ftype;
@ -1157,6 +1161,7 @@ int ColumnType::serialize(ByteStream& bytestream)
bytestream << compressiontype;
bytestream << autoincrement;
bytestream << nextVal;
bytestream << charsetNum;
// cout << "BS length = " << bytestream.length() << endl;