mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-23 07:05:36 +03:00
This patch improves handling of NULLs in textual fields in ColumnStore. Previously empty strings were considered NULLs and it could be a problem if data scheme allows for empty strings. It was also one of major reasons of behavior difference between ColumnStore and other engines in MariaDB family. Also, this patch fixes some other bugs and incorrect behavior, for example, incorrect comparison for "column <= ''" which evaluates to constant True for all purposes before this patch.
243 lines
5.7 KiB
C++
243 lines
5.7 KiB
C++
#include <type_traits>
|
|
#include "functor_json.h"
|
|
#include "functioncolumn.h"
|
|
#include "rowgroup.h"
|
|
#include "treenode.h"
|
|
using namespace execplan;
|
|
using namespace rowgroup;
|
|
|
|
#include "dataconvert.h"
|
|
|
|
#include "jsonhelpers.h"
|
|
using namespace funcexp::helpers;
|
|
|
|
namespace funcexp
|
|
{
|
|
int Func_json_extract::doExtract(Row& row, FunctionParm& fp, json_value_types* type, string& retJS,
|
|
bool compareWhole = true)
|
|
{
|
|
bool isNull = false;
|
|
const auto js = fp[0]->data()->getStrVal(row, isNull);
|
|
if (isNull)
|
|
return 1;
|
|
const char* rawJS = js.str();
|
|
json_engine_t jsEg, savJSEg;
|
|
json_path_t p;
|
|
const uchar* value;
|
|
bool notFirstVal = false;
|
|
size_t valLen;
|
|
bool mayMulVal;
|
|
int wildcards;
|
|
bool isMatch;
|
|
#ifdef MYSQL_GE_1009
|
|
int arrayCounter[JSON_DEPTH_LIMIT];
|
|
bool hasNegPath = false;
|
|
#endif
|
|
const size_t argSize = fp.size();
|
|
string tmp;
|
|
|
|
initJSPaths(paths, fp, 1, 1);
|
|
|
|
for (size_t i = 1; i < argSize; i++)
|
|
{
|
|
JSONPath& path = paths[i - 1];
|
|
path.p.types_used = JSON_PATH_KEY_NULL;
|
|
if (!path.parsed && parseJSPath(path, row, fp[i]))
|
|
return 1;
|
|
|
|
#ifdef MYSQL_GE_1009
|
|
hasNegPath |= path.p.types_used & JSON_PATH_NEGATIVE_INDEX;
|
|
#endif
|
|
}
|
|
|
|
#ifdef MYSQL_GE_1009
|
|
wildcards = (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | JSON_PATH_ARRAY_RANGE);
|
|
#else
|
|
wildcards = (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD);
|
|
#endif
|
|
mayMulVal = argSize > 2 || (paths[0].p.types_used & wildcards);
|
|
|
|
*type = mayMulVal ? JSON_VALUE_ARRAY : JSON_VALUE_NULL;
|
|
|
|
if (compareWhole)
|
|
{
|
|
retJS.clear();
|
|
if (mayMulVal)
|
|
retJS.append("[");
|
|
}
|
|
|
|
json_get_path_start(&jsEg, getCharset(fp[0]), (const uchar*)rawJS, (const uchar*)rawJS + js.length(), &p);
|
|
|
|
while (json_get_path_next(&jsEg, &p) == 0)
|
|
{
|
|
#ifdef MYSQL_GE_1009
|
|
if (hasNegPath && jsEg.value_type == JSON_VALUE_ARRAY &&
|
|
json_skip_array_and_count(&jsEg, arrayCounter + (p.last_step - p.steps)))
|
|
return 1;
|
|
#endif
|
|
|
|
#ifdef MYSQL_GE_1009
|
|
isMatch = matchJSPath(paths, &p, jsEg.value_type, arrayCounter, false);
|
|
#else
|
|
isMatch = matchJSPath(paths, &p, jsEg.value_type, nullptr, false);
|
|
#endif
|
|
if (!isMatch)
|
|
continue;
|
|
|
|
value = jsEg.value_begin;
|
|
if (*type == JSON_VALUE_NULL)
|
|
*type = jsEg.value_type;
|
|
|
|
/* we only care about the first found value */
|
|
if (!compareWhole)
|
|
{
|
|
retJS = js.safeString("");
|
|
return 0;
|
|
}
|
|
|
|
if (json_value_scalar(&jsEg))
|
|
valLen = jsEg.value_end - value;
|
|
else
|
|
{
|
|
if (mayMulVal)
|
|
savJSEg = jsEg;
|
|
if (json_skip_level(&jsEg))
|
|
return 1;
|
|
valLen = jsEg.s.c_str - value;
|
|
if (mayMulVal)
|
|
jsEg = savJSEg;
|
|
}
|
|
|
|
if (notFirstVal)
|
|
retJS.append(", ");
|
|
retJS.append((const char*)value, valLen);
|
|
|
|
notFirstVal = true;
|
|
|
|
if (!mayMulVal)
|
|
{
|
|
/* Loop to the end of the JSON just to make sure it's valid. */
|
|
while (json_get_path_next(&jsEg, &p) == 0)
|
|
{
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (unlikely(jsEg.s.error))
|
|
return 1;
|
|
|
|
if (!notFirstVal)
|
|
/* Nothing was found. */
|
|
return 1;
|
|
|
|
if (mayMulVal)
|
|
retJS.append("]");
|
|
|
|
utils::NullString retJS_ns(retJS);
|
|
initJSEngine(jsEg, getCharset(fp[0]), retJS_ns);
|
|
if (doFormat(&jsEg, tmp, Func_json_format::LOOSE))
|
|
return 1;
|
|
|
|
retJS.clear();
|
|
retJS.swap(tmp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
CalpontSystemCatalog::ColType Func_json_extract::operationType(FunctionParm& fp,
|
|
CalpontSystemCatalog::ColType& resultType)
|
|
{
|
|
return fp[0]->data()->resultType();
|
|
}
|
|
|
|
string Func_json_extract::getStrVal(Row& row, FunctionParm& fp, bool& isNull,
|
|
CalpontSystemCatalog::ColType& type)
|
|
{
|
|
string retJS;
|
|
json_value_types valType;
|
|
if (doExtract(row, fp, &valType, retJS) == 0)
|
|
return retJS;
|
|
|
|
isNull = true;
|
|
return "";
|
|
}
|
|
|
|
int64_t Func_json_extract::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
|
execplan::CalpontSystemCatalog::ColType& type)
|
|
{
|
|
string retJS;
|
|
json_value_types valType;
|
|
int64_t ret = 0;
|
|
if (doExtract(row, fp, &valType, retJS, false) == 0)
|
|
{
|
|
switch (valType)
|
|
{
|
|
case JSON_VALUE_NUMBER:
|
|
case JSON_VALUE_STRING:
|
|
{
|
|
char* end;
|
|
int err;
|
|
ret = getCharset(fp[0])->strntoll(retJS.data(), retJS.size(), 10, &end, &err);
|
|
break;
|
|
}
|
|
case JSON_VALUE_TRUE: ret = 1; break;
|
|
default: break;
|
|
};
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
double Func_json_extract::getDoubleVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
|
execplan::CalpontSystemCatalog::ColType& type)
|
|
{
|
|
string retJS;
|
|
json_value_types valType;
|
|
double ret = 0.0;
|
|
if (doExtract(row, fp, &valType, retJS, false) == 0)
|
|
{
|
|
switch (valType)
|
|
{
|
|
case JSON_VALUE_NUMBER:
|
|
case JSON_VALUE_STRING:
|
|
{
|
|
char* end;
|
|
int err;
|
|
ret = getCharset(fp[0])->strntod(retJS.data(), retJS.size(), &end, &err);
|
|
break;
|
|
}
|
|
case JSON_VALUE_TRUE: ret = 1.0; break;
|
|
default: break;
|
|
};
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
execplan::IDB_Decimal Func_json_extract::getDecimalVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
|
execplan::CalpontSystemCatalog::ColType& type)
|
|
{
|
|
json_value_types valType;
|
|
string retJS;
|
|
|
|
if (doExtract(row, fp, &valType, retJS, false) == 0)
|
|
{
|
|
switch (valType)
|
|
{
|
|
case JSON_VALUE_STRING:
|
|
case JSON_VALUE_NUMBER: return fp[0]->data()->getDecimalVal(row, isNull);
|
|
case JSON_VALUE_TRUE: return IDB_Decimal(1, 0, 1);
|
|
case JSON_VALUE_OBJECT:
|
|
case JSON_VALUE_ARRAY:
|
|
case JSON_VALUE_FALSE:
|
|
case JSON_VALUE_NULL:
|
|
case JSON_VALUE_UNINITIALIZED: break;
|
|
};
|
|
}
|
|
|
|
return IDB_Decimal(0, 0, 1);
|
|
}
|
|
} // namespace funcexp
|