1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

fix(mcol-4499): Correct handling of LIKE/NOT LIKE NULL

This commit is contained in:
Alexander Presnyakov
2024-07-10 11:55:23 +00:00
committed by Leonid Fedorov
parent 7bec46e685
commit 5f9ccfa8a1
6 changed files with 105 additions and 8 deletions

View File

@ -0,0 +1,13 @@
DROP DATABASE IF EXISTS `doubles_and_nulls`;
CREATE DATABASE `doubles_and_nulls`;
USE `doubles_and_nulls`;
DROP TABLE IF EXISTS qatablefloat;
CREATE TABLE qatablefloat (col float) engine=columnstore;
INSERT INTO qatablefloat VALUES (null);
INSERT INTO qatablefloat VALUES (null);
INSERT INTO qatablefloat VALUES (null);
DELETE FROM qatablefloat WHERE col IS NULL;
SELECT * FROM qatablefloat;
col
DROP TABLE qatablefloat;
DROP DATABASE `doubles_and_nulls`;

View File

@ -0,0 +1,19 @@
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS `doubles_and_nulls`;
CREATE DATABASE `doubles_and_nulls`;
USE `doubles_and_nulls`;
DROP TABLE IF EXISTS qatablefloat;
--enable_warnings
CREATE TABLE qatablefloat (col float) engine=columnstore;
INSERT INTO qatablefloat VALUES (null);
INSERT INTO qatablefloat VALUES (null);
INSERT INTO qatablefloat VALUES (null);
DELETE FROM qatablefloat WHERE col IS NULL;
SELECT * FROM qatablefloat;
DROP TABLE qatablefloat;
DROP DATABASE `doubles_and_nulls`;

View File

@ -0,0 +1,20 @@
DROP DATABASE IF EXISTS `mcol_4499`;
CREATE DATABASE `mcol_4499`;
USE `mcol_4499`;
DROP TABLE IF EXISTS twidevarchar;
DROP TABLE IF EXISTS twidechar;
CREATE TABLE twidevarchar (col VARCHAR(4)) ENGINE=ColumnStore;
INSERT INTO twidevarchar VALUES (NULL),('a');
SELECT col FROM twidevarchar WHERE col LIKE NULL;
col
SELECT col FROM twidevarchar WHERE col NOT LIKE NULL;
col
DROP TABLE twidevarchar;
CREATE TABLE twidechar (col CHAR(5)) ENGINE=ColumnStore;
INSERT INTO twidechar VALUES (NULL),('a');
SELECT col FROM twidechar WHERE col LIKE NULL;
col
SELECT col FROM twidechar WHERE col NOT LIKE NULL;
col
DROP TABLE twidechar;
DROP DATABASE `mcol_4499`;

View File

@ -0,0 +1,27 @@
#
# MCOL-4499 NOT LIKE NULL must not return any rows
#
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS `mcol_4499`;
CREATE DATABASE `mcol_4499`;
USE `mcol_4499`;
DROP TABLE IF EXISTS twidevarchar;
DROP TABLE IF EXISTS twidechar;
--enable_warnings
CREATE TABLE twidevarchar (col VARCHAR(4)) ENGINE=ColumnStore;
INSERT INTO twidevarchar VALUES (NULL),('a');
SELECT col FROM twidevarchar WHERE col LIKE NULL;
SELECT col FROM twidevarchar WHERE col NOT LIKE NULL;
DROP TABLE twidevarchar;
CREATE TABLE twidechar (col CHAR(5)) ENGINE=ColumnStore;
INSERT INTO twidechar VALUES (NULL),('a');
SELECT col FROM twidechar WHERE col LIKE NULL;
SELECT col FROM twidechar WHERE col NOT LIKE NULL;
DROP TABLE twidechar;
DROP DATABASE `mcol_4499`;

View File

@ -1754,6 +1754,14 @@ void filterColumnData(NewColRequestHeader* in, ColResultHeader* out, uint16_t* r
if (parsedColumnFilter.get() == nullptr && filterCount > 0) if (parsedColumnFilter.get() == nullptr && filterCount > 0)
parsedColumnFilter = _parseColumnFilter<T>(in->getFilterStringPtr(), dataType, filterCount, in->BOP); parsedColumnFilter = _parseColumnFilter<T>(in->getFilterStringPtr(), dataType, filterCount, in->BOP);
// If the filter is always false, return an empty result
// TODO how can parsedColumnFilter be nullptr here?
if (parsedColumnFilter.get() != nullptr && parsedColumnFilter->columnFilterMode == ALWAYS_FALSE)
{
out->NVALS = 0;
return;
}
// Cache parsedColumnFilter fields in local vars // Cache parsedColumnFilter fields in local vars
auto columnFilterMode = filterCount == 0 ? ALWAYS_TRUE : parsedColumnFilter->columnFilterMode; auto columnFilterMode = filterCount == 0 ? ALWAYS_TRUE : parsedColumnFilter->columnFilterMode;
FT* filterValues = filterCount == 0 ? nullptr : parsedColumnFilter->getFilterVals<FT>(); FT* filterValues = filterCount == 0 ? nullptr : parsedColumnFilter->getFilterVals<FT>();

View File

@ -28,6 +28,7 @@
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
#include <tr1/unordered_set> #include <tr1/unordered_set>
#include "joblisttypes.h"
#define POSIX_REGEX #define POSIX_REGEX
@ -68,6 +69,7 @@ enum ColumnFilterMode
// COMPARE_EQ) // COMPARE_EQ)
NONE_OF_VALUES_IN_ARRAY, // NONE of the values in the small set represented by an array (BOP_AND + all NONE_OF_VALUES_IN_ARRAY, // NONE of the values in the small set represented by an array (BOP_AND + all
// COMPARE_NE) // COMPARE_NE)
ALWAYS_FALSE, // comparison is always false
}; };
// TBD Test if avalance makes lookup in the hash maps based on this hashers faster. // TBD Test if avalance makes lookup in the hash maps based on this hashers faster.
@ -482,16 +484,18 @@ T getNullValue(uint8_t type)
case execplan::CalpontSystemCatalog::DOUBLE: case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE: return joblist::DOUBLENULL; case execplan::CalpontSystemCatalog::UDOUBLE: return joblist::DOUBLENULL;
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::DATE: case execplan::CalpontSystemCatalog::DATE:
case execplan::CalpontSystemCatalog::DATETIME: case execplan::CalpontSystemCatalog::DATETIME:
case execplan::CalpontSystemCatalog::TIMESTAMP: case execplan::CalpontSystemCatalog::TIMESTAMP:
case execplan::CalpontSystemCatalog::TIME: case execplan::CalpontSystemCatalog::TIME:
case execplan::CalpontSystemCatalog::VARBINARY: case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::BLOB: case execplan::CalpontSystemCatalog::BLOB:
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::TEXT: return joblist::CHAR8NULL; case execplan::CalpontSystemCatalog::TEXT: return joblist::CHAR8NULL;
// VARCHARs with width >= 8 are stored as dictionaries (used TypeHandlerVarchar::getNullValueForType as a reference)
case execplan::CalpontSystemCatalog::VARCHAR: return joblist::UBIGINTNULL;
case execplan::CalpontSystemCatalog::UBIGINT: return joblist::UBIGINTNULL; case execplan::CalpontSystemCatalog::UBIGINT: return joblist::UBIGINTNULL;
default: return joblist::BIGINTNULL; default: return joblist::BIGINTNULL;
@ -578,7 +582,6 @@ boost::shared_ptr<ParsedColumnFilter> _parseColumnFilter(
using UT = typename std::conditional<std::is_unsigned<T>::value || datatypes::is_uint128_t<T>::value, T, using UT = typename std::conditional<std::is_unsigned<T>::value || datatypes::is_uint128_t<T>::value, T,
typename datatypes::make_unsigned<T>::type>::type; typename datatypes::make_unsigned<T>::type>::type;
const uint32_t WIDTH = sizeof(T); // Sizeof of the column to be filtered const uint32_t WIDTH = sizeof(T); // Sizeof of the column to be filtered
boost::shared_ptr<ParsedColumnFilter> ret; // Place for building the value to return boost::shared_ptr<ParsedColumnFilter> ret; // Place for building the value to return
if (filterCount == 0) if (filterCount == 0)
return ret; return ret;
@ -612,28 +615,35 @@ boost::shared_ptr<ParsedColumnFilter> _parseColumnFilter(
ret->prestored_rfs[argIndex] = args->rf; ret->prestored_rfs[argIndex] = args->rf;
auto colDataType = (execplan::CalpontSystemCatalog::ColDataType)colType; auto colDataType = (execplan::CalpontSystemCatalog::ColDataType)colType;
bool isNullEqCmp = false; bool isFilterValueNull = false;
if (datatypes::isUnsigned(colDataType)) if (datatypes::isUnsigned(colDataType))
{ {
const auto nullValue = getNullValue<UT>(colDataType); const auto nullValue = getNullValue<UT>(colDataType);
const UT* filterValue = reinterpret_cast<const UT*>(args->val); const UT* filterValue = reinterpret_cast<const UT*>(args->val);
isNullEqCmp = isFilterValueNull = memcmp(filterValue, &nullValue, sizeof(nullValue)) == 0;
(args->COP == COMPARE_EQ && memcmp(filterValue, &nullValue, sizeof(nullValue)) == 0) ? true : false;
ret->storeFilterArg(argIndex, filterValue); ret->storeFilterArg(argIndex, filterValue);
} }
else else
{ {
const auto nullValue = getNullValue<T>(colDataType); const auto nullValue = getNullValue<T>(colDataType);
const T* filterValue = reinterpret_cast<const T*>(args->val); const T* filterValue = reinterpret_cast<const T*>(args->val);
isNullEqCmp = isFilterValueNull = memcmp(filterValue, &nullValue, sizeof(nullValue)) == 0;
(args->COP == COMPARE_EQ && memcmp(filterValue, &nullValue, sizeof(nullValue)) == 0) ? true : false;
ret->storeFilterArg(argIndex, filterValue); ret->storeFilterArg(argIndex, filterValue);
} }
// Check if the filter is [NOT] LIKE NULL -- such filters don't match any values
if ((args->COP == COMPARE_LIKE || args->COP == COMPARE_NLIKE) && isFilterValueNull)
{
ret->columnFilterMode = ALWAYS_FALSE;
goto skipConversion;
}
// IS NULL filtering expression is translated into COMPARE_EQ + NULL magic in the filter. // IS NULL filtering expression is translated into COMPARE_EQ + NULL magic in the filter.
// This if replaces an operation id once to avoid additional branching in the main loop // This if replaces an operation id once to avoid additional branching in the main loop
// of vectorizedFiltering_ in column.cpp. // of vectorizedFiltering_ in column.cpp.
// It would be cleaner to place in into EM though. // It would be cleaner to place in into EM though.
bool isNullEqCmp = (isFilterValueNull && args->COP == COMPARE_EQ);
ret->prestored_cops[argIndex] = (isNullEqCmp) ? COMPARE_NULLEQ : args->COP; ret->prestored_cops[argIndex] = (isNullEqCmp) ? COMPARE_NULLEQ : args->COP;
} }