1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00

fix(mcol-4499): Correct handling of LIKE/NOT LIKE NULL

This commit is contained in:
Alexander Presnyakov 2024-07-10 11:55:23 +00:00 committed by Leonid Fedorov
parent 7bec46e685
commit 5f9ccfa8a1
6 changed files with 105 additions and 8 deletions

View File

@ -0,0 +1,13 @@
DROP DATABASE IF EXISTS `doubles_and_nulls`;
CREATE DATABASE `doubles_and_nulls`;
USE `doubles_and_nulls`;
DROP TABLE IF EXISTS qatablefloat;
CREATE TABLE qatablefloat (col float) engine=columnstore;
INSERT INTO qatablefloat VALUES (null);
INSERT INTO qatablefloat VALUES (null);
INSERT INTO qatablefloat VALUES (null);
DELETE FROM qatablefloat WHERE col IS NULL;
SELECT * FROM qatablefloat;
col
DROP TABLE qatablefloat;
DROP DATABASE `doubles_and_nulls`;

View File

@ -0,0 +1,19 @@
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS `doubles_and_nulls`;
CREATE DATABASE `doubles_and_nulls`;
USE `doubles_and_nulls`;
DROP TABLE IF EXISTS qatablefloat;
--enable_warnings
CREATE TABLE qatablefloat (col float) engine=columnstore;
INSERT INTO qatablefloat VALUES (null);
INSERT INTO qatablefloat VALUES (null);
INSERT INTO qatablefloat VALUES (null);
DELETE FROM qatablefloat WHERE col IS NULL;
SELECT * FROM qatablefloat;
DROP TABLE qatablefloat;
DROP DATABASE `doubles_and_nulls`;

View File

@ -0,0 +1,20 @@
DROP DATABASE IF EXISTS `mcol_4499`;
CREATE DATABASE `mcol_4499`;
USE `mcol_4499`;
DROP TABLE IF EXISTS twidevarchar;
DROP TABLE IF EXISTS twidechar;
CREATE TABLE twidevarchar (col VARCHAR(4)) ENGINE=ColumnStore;
INSERT INTO twidevarchar VALUES (NULL),('a');
SELECT col FROM twidevarchar WHERE col LIKE NULL;
col
SELECT col FROM twidevarchar WHERE col NOT LIKE NULL;
col
DROP TABLE twidevarchar;
CREATE TABLE twidechar (col CHAR(5)) ENGINE=ColumnStore;
INSERT INTO twidechar VALUES (NULL),('a');
SELECT col FROM twidechar WHERE col LIKE NULL;
col
SELECT col FROM twidechar WHERE col NOT LIKE NULL;
col
DROP TABLE twidechar;
DROP DATABASE `mcol_4499`;

View File

@ -0,0 +1,27 @@
#
# MCOL-4499 NOT LIKE NULL must not return any rows
#
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS `mcol_4499`;
CREATE DATABASE `mcol_4499`;
USE `mcol_4499`;
DROP TABLE IF EXISTS twidevarchar;
DROP TABLE IF EXISTS twidechar;
--enable_warnings
CREATE TABLE twidevarchar (col VARCHAR(4)) ENGINE=ColumnStore;
INSERT INTO twidevarchar VALUES (NULL),('a');
SELECT col FROM twidevarchar WHERE col LIKE NULL;
SELECT col FROM twidevarchar WHERE col NOT LIKE NULL;
DROP TABLE twidevarchar;
CREATE TABLE twidechar (col CHAR(5)) ENGINE=ColumnStore;
INSERT INTO twidechar VALUES (NULL),('a');
SELECT col FROM twidechar WHERE col LIKE NULL;
SELECT col FROM twidechar WHERE col NOT LIKE NULL;
DROP TABLE twidechar;
DROP DATABASE `mcol_4499`;

View File

@ -1754,6 +1754,14 @@ void filterColumnData(NewColRequestHeader* in, ColResultHeader* out, uint16_t* r
if (parsedColumnFilter.get() == nullptr && filterCount > 0)
parsedColumnFilter = _parseColumnFilter<T>(in->getFilterStringPtr(), dataType, filterCount, in->BOP);
// If the filter is always false, return an empty result
// TODO how can parsedColumnFilter be nullptr here?
if (parsedColumnFilter.get() != nullptr && parsedColumnFilter->columnFilterMode == ALWAYS_FALSE)
{
out->NVALS = 0;
return;
}
// Cache parsedColumnFilter fields in local vars
auto columnFilterMode = filterCount == 0 ? ALWAYS_TRUE : parsedColumnFilter->columnFilterMode;
FT* filterValues = filterCount == 0 ? nullptr : parsedColumnFilter->getFilterVals<FT>();

View File

@ -28,6 +28,7 @@
#include <stdexcept>
#include <vector>
#include <tr1/unordered_set>
#include "joblisttypes.h"
#define POSIX_REGEX
@ -68,6 +69,7 @@ enum ColumnFilterMode
// COMPARE_EQ)
NONE_OF_VALUES_IN_ARRAY, // NONE of the values in the small set represented by an array (BOP_AND + all
// COMPARE_NE)
ALWAYS_FALSE, // comparison is always false
};
// TBD Test if avalance makes lookup in the hash maps based on this hashers faster.
@ -482,16 +484,18 @@ T getNullValue(uint8_t type)
case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE: return joblist::DOUBLENULL;
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::DATE:
case execplan::CalpontSystemCatalog::DATETIME:
case execplan::CalpontSystemCatalog::TIMESTAMP:
case execplan::CalpontSystemCatalog::TIME:
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::BLOB:
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::TEXT: return joblist::CHAR8NULL;
// VARCHARs with width >= 8 are stored as dictionaries (used TypeHandlerVarchar::getNullValueForType as a reference)
case execplan::CalpontSystemCatalog::VARCHAR: return joblist::UBIGINTNULL;
case execplan::CalpontSystemCatalog::UBIGINT: return joblist::UBIGINTNULL;
default: return joblist::BIGINTNULL;
@ -578,7 +582,6 @@ boost::shared_ptr<ParsedColumnFilter> _parseColumnFilter(
using UT = typename std::conditional<std::is_unsigned<T>::value || datatypes::is_uint128_t<T>::value, T,
typename datatypes::make_unsigned<T>::type>::type;
const uint32_t WIDTH = sizeof(T); // Sizeof of the column to be filtered
boost::shared_ptr<ParsedColumnFilter> ret; // Place for building the value to return
if (filterCount == 0)
return ret;
@ -612,28 +615,35 @@ boost::shared_ptr<ParsedColumnFilter> _parseColumnFilter(
ret->prestored_rfs[argIndex] = args->rf;
auto colDataType = (execplan::CalpontSystemCatalog::ColDataType)colType;
bool isNullEqCmp = false;
bool isFilterValueNull = false;
if (datatypes::isUnsigned(colDataType))
{
const auto nullValue = getNullValue<UT>(colDataType);
const UT* filterValue = reinterpret_cast<const UT*>(args->val);
isNullEqCmp =
(args->COP == COMPARE_EQ && memcmp(filterValue, &nullValue, sizeof(nullValue)) == 0) ? true : false;
isFilterValueNull = memcmp(filterValue, &nullValue, sizeof(nullValue)) == 0;
ret->storeFilterArg(argIndex, filterValue);
}
else
{
const auto nullValue = getNullValue<T>(colDataType);
const T* filterValue = reinterpret_cast<const T*>(args->val);
isNullEqCmp =
(args->COP == COMPARE_EQ && memcmp(filterValue, &nullValue, sizeof(nullValue)) == 0) ? true : false;
isFilterValueNull = memcmp(filterValue, &nullValue, sizeof(nullValue)) == 0;
ret->storeFilterArg(argIndex, filterValue);
}
// Check if the filter is [NOT] LIKE NULL -- such filters don't match any values
if ((args->COP == COMPARE_LIKE || args->COP == COMPARE_NLIKE) && isFilterValueNull)
{
ret->columnFilterMode = ALWAYS_FALSE;
goto skipConversion;
}
// IS NULL filtering expression is translated into COMPARE_EQ + NULL magic in the filter.
// This if replaces an operation id once to avoid additional branching in the main loop
// of vectorizedFiltering_ in column.cpp.
// It would be cleaner to place in into EM though.
bool isNullEqCmp = (isFilterValueNull && args->COP == COMPARE_EQ);
ret->prestored_cops[argIndex] = (isNullEqCmp) ? COMPARE_NULLEQ : args->COP;
}