1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

MDEV-11371 - column compression

Storage engine independent support for column compression.

TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB, TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT,
VARCHAR and VARBINARY columns can be compressed.

New COMPRESSED column attribute added:
COMPRESSED[=<compression_method>]

System variables added:
column_compression_threshold
column_compression_zlib_level
column_compression_zlib_strategy
column_compression_zlib_wrap

Status variables added:
Column_compressions
Column_decompressions

Limitations:
- the only supported method currently is zlib
- CSV storage engine stores data uncompressed on-disk even if COMPRESSED
  attribute is present
- it is not possible to create indexes over compressed columns.
This commit is contained in:
Sergey Vojtovich
2017-04-24 17:54:18 +04:00
parent dd4e9cdded
commit fdc4779235
32 changed files with 2648 additions and 151 deletions

View File

@ -128,6 +128,8 @@ max_display_length_for_field(enum_field_types sql_type, unsigned int metadata)
case MYSQL_TYPE_VAR_STRING:
case MYSQL_TYPE_VARCHAR:
return metadata;
case MYSQL_TYPE_VARCHAR_COMPRESSED:
return metadata - 1;
/*
The actual length for these types does not really matter since
@ -145,6 +147,7 @@ max_display_length_for_field(enum_field_types sql_type, unsigned int metadata)
return my_set_bits(3 * 8);
case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_BLOB_COMPRESSED:
/*
For the blob type, Field::real_type() lies and say that all
blobs are of type MYSQL_TYPE_BLOB. In that case, we have to look
@ -294,6 +297,7 @@ uint32 table_def::calc_field_size(uint col, uchar *master_data) const
break;
}
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_VARCHAR_COMPRESSED:
{
length= m_field_metadata[col] > 255 ? 2 : 1; // c&p of Field_varstring::data_length()
length+= length == 1 ? (uint32) *master_data : uint2korr(master_data);
@ -303,6 +307,7 @@ uint32 table_def::calc_field_size(uint col, uchar *master_data) const
case MYSQL_TYPE_MEDIUM_BLOB:
case MYSQL_TYPE_LONG_BLOB:
case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_BLOB_COMPRESSED:
case MYSQL_TYPE_GEOMETRY:
{
/*
@ -406,11 +411,14 @@ void show_sql_type(enum_field_types type, uint16 metadata, String *str, CHARSET_
case MYSQL_TYPE_VAR_STRING:
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_VARCHAR_COMPRESSED:
{
CHARSET_INFO *cs= str->charset();
uint32 length=
cs->cset->snprintf(cs, (char*) str->ptr(), str->alloced_length(),
"varchar(%u)", metadata);
"varchar(%u)%s", metadata,
type == MYSQL_TYPE_VARCHAR_COMPRESSED ? " compressed"
: "");
str->length(length);
}
break;
@ -455,6 +463,7 @@ void show_sql_type(enum_field_types type, uint16 metadata, String *str, CHARSET_
break;
case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_BLOB_COMPRESSED:
/*
Field::real_type() lies regarding the actual type of a BLOB, so
it is necessary to check the pack length to figure out what kind
@ -482,6 +491,9 @@ void show_sql_type(enum_field_types type, uint16 metadata, String *str, CHARSET_
DBUG_ASSERT(0);
break;
}
if (type == MYSQL_TYPE_BLOB_COMPRESSED)
str->append(STRING_WITH_LEN(" compressed"));
break;
case MYSQL_TYPE_STRING:
@ -583,6 +595,7 @@ can_convert_field_to(Field *field,
int *order_var)
{
DBUG_ENTER("can_convert_field_to");
bool same_type;
#ifndef DBUG_OFF
char field_type_buf[MAX_FIELD_WIDTH];
String field_type(field_type_buf, sizeof(field_type_buf), &my_charset_latin1);
@ -590,11 +603,30 @@ can_convert_field_to(Field *field,
DBUG_PRINT("enter", ("field_type: %s, target_type: %d, source_type: %d, source_metadata: 0x%x",
field_type.c_ptr_safe(), field->real_type(), source_type, metadata));
#endif
/**
@todo
Implement Field_varstring_cmopressed::real_type() and
Field_blob_compressed::real_type() properly. All occurencies
of Field::real_type() have to be inspected and adjusted if needed.
Until it is not ready we have to compare source_type against
binlog_type() when replicating from or to compressed data types.
@sa Comment for Field::binlog_type()
*/
if (source_type == MYSQL_TYPE_VARCHAR_COMPRESSED ||
source_type == MYSQL_TYPE_BLOB_COMPRESSED ||
field->binlog_type() == MYSQL_TYPE_VARCHAR_COMPRESSED ||
field->binlog_type() == MYSQL_TYPE_BLOB_COMPRESSED)
same_type= field->binlog_type() == source_type;
else
same_type= field->real_type() == source_type;
/*
If the real type is the same, we need to check the metadata to
decide if conversions are allowed.
*/
if (field->real_type() == source_type)
if (same_type)
{
if (metadata == 0) // Metadata can only be zero if no metadata was provided
{
@ -731,18 +763,22 @@ can_convert_field_to(Field *field,
case MYSQL_TYPE_MEDIUM_BLOB:
case MYSQL_TYPE_LONG_BLOB:
case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_BLOB_COMPRESSED:
case MYSQL_TYPE_STRING:
case MYSQL_TYPE_VAR_STRING:
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_VARCHAR_COMPRESSED:
switch (field->real_type())
{
case MYSQL_TYPE_TINY_BLOB:
case MYSQL_TYPE_MEDIUM_BLOB:
case MYSQL_TYPE_LONG_BLOB:
case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_BLOB_COMPRESSED:
case MYSQL_TYPE_STRING:
case MYSQL_TYPE_VAR_STRING:
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_VARCHAR_COMPRESSED:
*order_var= compare_lengths(field, source_type, metadata);
/*
Here we know that the types are different, so if the order
@ -1036,6 +1072,7 @@ table_def::table_def(unsigned char *types, ulong size,
switch (binlog_type(i)) {
case MYSQL_TYPE_TINY_BLOB:
case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_BLOB_COMPRESSED:
case MYSQL_TYPE_MEDIUM_BLOB:
case MYSQL_TYPE_LONG_BLOB:
case MYSQL_TYPE_DOUBLE:
@ -1066,6 +1103,7 @@ table_def::table_def(unsigned char *types, ulong size,
break;
}
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_VARCHAR_COMPRESSED:
{
/*
These types store two bytes.