From 9e37ab82d8df7f4fd75e232fc6488da396cd1ea8 Mon Sep 17 00:00:00 2001 From: Serguey Zefirov Date: Thu, 16 Nov 2023 20:49:29 +0300 Subject: [PATCH] MCOL-5607: JSON function use crashes query execution JSON functions were implemented violating an assumption of their pureness, as they should not have any state. This concrete patch fixes implementation of JSON_VALUE function. --- .../basic/r/MCOL-5607-json-value-fix.result | 34 +++++++++++++++ .../basic/t/MCOL-5607-json-value-fix.test | 32 +++++++++++++++ utils/funcexp/func_json_value.cpp | 41 +++++++++++-------- utils/funcexp/functor.h | 5 +++ utils/funcexp/functor_json.h | 8 +--- 5 files changed, 97 insertions(+), 23 deletions(-) create mode 100644 mysql-test/columnstore/basic/r/MCOL-5607-json-value-fix.result create mode 100644 mysql-test/columnstore/basic/t/MCOL-5607-json-value-fix.test diff --git a/mysql-test/columnstore/basic/r/MCOL-5607-json-value-fix.result b/mysql-test/columnstore/basic/r/MCOL-5607-json-value-fix.result new file mode 100644 index 000000000..2d8b58b81 --- /dev/null +++ b/mysql-test/columnstore/basic/r/MCOL-5607-json-value-fix.result @@ -0,0 +1,34 @@ +DROP DATABASE IF EXISTS MCOL5607; +CREATE DATABASE MCOL5607; +USE MCOL5607; +CREATE TABLE zu (hu TEXT) ENGINE = COLUMNSTORE; +INSERT INTO zu(hu) VALUES ('{}'), (NULL), ('{ "": "huh", "10001" : "10001", "10002" : "10001", "10003" : "10001", "10004" : "10001", "10005" : "10001", "10006" : "10001", "10007" : "10001", "10008" : "10001", "10009" : "10001", "10010" : "10001", "10011" : "10001", "10012" : "10001", "10013" : "10001", "10014" : "10001", "10015" : "10001", "10016" : "10001", "10017" : "10001", "10018" : "10001", "10019" : "10001", "10020" : "10001", "buga" : ""}'); +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +SELECT COUNT(*) FROM zu; +COUNT(*) +3145728 +SELECT COUNT(*) FROM zu wHERE +JSON_VALUE(hu, '$.buga') IS NOT NULL +AND JSON_VALUE(hu, '$.zhuga') IS NULL; +COUNT(*) +1048576 +DROP DATABASE MCOL5607; diff --git a/mysql-test/columnstore/basic/t/MCOL-5607-json-value-fix.test b/mysql-test/columnstore/basic/t/MCOL-5607-json-value-fix.test new file mode 100644 index 000000000..a344e4cee --- /dev/null +++ b/mysql-test/columnstore/basic/t/MCOL-5607-json-value-fix.test @@ -0,0 +1,32 @@ +--disable_warnings +DROP DATABASE IF EXISTS MCOL5607; +--enable_warnings +CREATE DATABASE MCOL5607; +USE MCOL5607; +CREATE TABLE zu (hu TEXT) ENGINE = COLUMNSTORE; +INSERT INTO zu(hu) VALUES ('{}'), (NULL), ('{ "": "huh", "10001" : "10001", "10002" : "10001", "10003" : "10001", "10004" : "10001", "10005" : "10001", "10006" : "10001", "10007" : "10001", "10008" : "10001", "10009" : "10001", "10010" : "10001", "10011" : "10001", "10012" : "10001", "10013" : "10001", "10014" : "10001", "10015" : "10001", "10016" : "10001", "10017" : "10001", "10018" : "10001", "10019" : "10001", "10020" : "10001", "buga" : ""}'); +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +INSERT INTO zu(hu) SELECT hu FROM zu; +SELECT COUNT(*) FROM zu; +SELECT COUNT(*) FROM zu wHERE + JSON_VALUE(hu, '$.buga') IS NOT NULL +AND JSON_VALUE(hu, '$.zhuga') IS NULL; +DROP DATABASE MCOL5607; diff --git a/utils/funcexp/func_json_value.cpp b/utils/funcexp/func_json_value.cpp index 9fa6d0cde..265bfc40f 100644 --- a/utils/funcexp/func_json_value.cpp +++ b/utils/funcexp/func_json_value.cpp @@ -63,29 +63,18 @@ bool JSONPathWrapper::extract(std::string& ret, rowgroup::Row& row, execplan::SP { bool isNullJS = false, isNullPath = false; - const string js = funcParamJS->data()->getStrVal(row, isNullJS).safeString(""); + const utils::NullString& js = funcParamJS->data()->getStrVal(row, isNullJS); const utils::NullString& sjsp = funcParamPath->data()->getStrVal(row, isNullPath); if (isNullJS || isNullPath) return true; int error = 0; - if (!parsed) - { - if (!constant) - { - ConstantColumn* constCol = dynamic_cast(funcParamPath->data()); - constant = (constCol != nullptr); - } + if (json_path_setup(&p, getCharset(funcParamPath), (const uchar*)sjsp.str(), + (const uchar*)sjsp.end())) + return true; - if (isNullPath || json_path_setup(&p, getCharset(funcParamPath), (const uchar*)sjsp.str(), - (const uchar*)sjsp.end())) - return true; - - parsed = constant; - } - - JSONEgWrapper je(js, getCharset(funcParamJS)); + JSONEgWrapper je(getCharset(funcParamJS), reinterpret_cast(js.str()), reinterpret_cast(js.end())); currStep = p.steps; @@ -112,11 +101,29 @@ CalpontSystemCatalog::ColType Func_json_value::operationType(FunctionParm& fp, return fp[0]->data()->resultType(); } +class JSONPathWrapperValue : public JSONPathWrapper +{ + public: + JSONPathWrapperValue() + { + } + virtual ~JSONPathWrapperValue() + { + } + + bool checkAndGetValue(JSONEgWrapper* je, string& res, int* error) override + { + return je->checkAndGetScalar(res, error); + } + +}; + string Func_json_value::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& type) { string ret; - isNull = JSONPathWrapper::extract(ret, row, fp[0], fp[1]); + JSONPathWrapperValue pw; + isNull = pw.extract(ret, row, fp[0], fp[1]); return isNull ? "" : ret; } } // namespace funcexp diff --git a/utils/funcexp/functor.h b/utils/funcexp/functor.h index 23c8356b4..f10258cd3 100644 --- a/utils/funcexp/functor.h +++ b/utils/funcexp/functor.h @@ -58,6 +58,11 @@ typedef std::vector FunctionParm; constexpr const int32_t MAX_MICROSECOND_PRECISION = 6; /** @brief Func class + * + * @desc IMPOTRANT: functions are pure transformers, they should + * not have state shared between invocations. This is so because + * functions' objects are, essentially, singletons and the same + * objects will be used in diffeent threads. */ class Func { diff --git a/utils/funcexp/functor_json.h b/utils/funcexp/functor_json.h index 109fba0bc..09afc8d8c 100644 --- a/utils/funcexp/functor_json.h +++ b/utils/funcexp/functor_json.h @@ -56,6 +56,7 @@ class JSONPathWrapper : public JSONPath { } virtual bool checkAndGetValue(JSONEgWrapper* je, std::string& ret, int* error) = 0; + public: bool extract(std::string& ret, rowgroup::Row& row, execplan::SPTP& funcParmJS, execplan::SPTP& funcParmPath); }; @@ -381,7 +382,7 @@ class Func_json_merge_patch : public Func_Str /** @brief Func_json_value class */ -class Func_json_value : public Func_Str, public JSONPathWrapper +class Func_json_value : public Func_Str { public: Func_json_value() : Func_Str("json_value") @@ -391,11 +392,6 @@ class Func_json_value : public Func_Str, public JSONPathWrapper { } - bool checkAndGetValue(JSONEgWrapper* je, string& res, int* error) override - { - return je->checkAndGetScalar(res, error); - } - execplan::CalpontSystemCatalog::ColType operationType( FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType) override;