1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

MCOL-785 Implement DISTRIBUTED JSON functions

The following functions are created:
Create function JSON_VALID and test cases
Create function JSON_DEPTH and test cases
Create function JSON_LENGTH and test cases
Create function JSON_EQUALS and test cases
Create function JSON_NORMALIZE and test cases
Create function JSON_TYPE and test cases
Create function JSON_OBJECT and test cases
Create function JSON_ARRAY and test cases
Create function JSON_KEYS and test cases
Create function JSON_EXISTS and test cases
Create function JSON_QUOTE/JSON_UNQUOTE and test cases
Create function JSON_COMPACT/DETAILED/LOOSE and test cases
Create function JSON_MERGE and test cases
Create function JSON_MERGE_PATCH and test cases
Create function JSON_VALUE and test cases
Create function JSON_QUERY and test cases
Create function JSON_CONTAINS and test cases
Create function JSON_ARRAY_APPEND and test cases
Create function JSON_ARRAY_INSERT and test cases
Create function JSON_INSERT/REPLACE/SET and test cases
Create function JSON_REMOVE and test cases
Create function JSON_CONTAINS_PATH and test cases
Create function JSON_OVERLAPS and test cases
Create function JSON_EXTRACT and test cases
Create function JSON_SEARCH and test cases

Note:
Some functions output differs from MDB because session variables that affects functions output,e.g JSON_QUOTE/JSON_UNQUOTE
This depends on MCOL-5212
This commit is contained in:
Ziy1-Tan
2022-06-23 10:49:29 +08:00
parent b5d8e0324b
commit cdd41f05f3
82 changed files with 8645 additions and 2 deletions

View File

@ -57,6 +57,32 @@ set(funcexp_LIB_SRCS
func_insert.cpp
func_instr.cpp
func_isnull.cpp
func_json_array.cpp
func_json_array_append.cpp
func_json_array_insert.cpp
func_json_contains.cpp
func_json_contains_path.cpp
func_json_depth.cpp
func_json_equals.cpp
func_json_exists.cpp
func_json_extract.cpp
func_json_format.cpp
func_json_insert.cpp
func_json_keys.cpp
func_json_length.cpp
func_json_merge.cpp
func_json_merge_patch.cpp
func_json_normalize.cpp
func_json_object.cpp
func_json_overlaps.cpp
func_json_query.cpp
func_json_quote.cpp
func_json_remove.cpp
func_json_search.cpp
func_json_type.cpp
func_json_unquote.cpp
func_json_valid.cpp
func_json_value.cpp
func_last_day.cpp
func_lcase.cpp
func_least.cpp
@ -117,6 +143,7 @@ set(funcexp_LIB_SRCS
func_weekday.cpp
func_year.cpp
func_yearweek.cpp
jsonhelpers.cpp
sql_crypt.cpp)
add_library(funcexp SHARED ${funcexp_LIB_SRCS})

View File

@ -0,0 +1,52 @@
#include <string>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_array::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp.size() > 0 ? fp[0]->data()->resultType() : resultType;
}
string Func_json_array::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
if (fp.size() == 0)
return "[]";
const CHARSET_INFO* retCS = type.getCharset();
string ret("[");
if (appendJSValue(ret, retCS, row, fp[0]))
goto error;
for (size_t i = 1; i < fp.size(); i++)
{
ret.append(", ");
if (appendJSValue(ret, retCS, row, fp[i]))
goto error;
}
ret.append("]");
return ret;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,117 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_array_append::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_array_append::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
const CHARSET_INFO* cs = getCharset(fp[0]);
json_engine_t jsEg;
const uchar* arrEnd;
size_t strRestLen;
string retJS;
retJS.reserve(js.size() + padding);
initJSPaths(paths, fp, 1, 2);
string tmpJS{js};
for (size_t i = 1, j = 0; i < fp.size(); i += 2, j++)
{
const char* rawJS = tmpJS.data();
const size_t jsLen = tmpJS.size();
JSONPath& path = paths[j];
if (!path.parsed && parseJSPath(path, row, fp[i], false))
goto error;
initJSEngine(jsEg, cs, tmpJS);
if (locateJSPath(jsEg, path))
goto error;
if (json_read_value(&jsEg))
goto error;
if (jsEg.value_type == JSON_VALUE_ARRAY)
{
int itemSize;
if (json_skip_level_and_count(&jsEg, &itemSize))
goto error;
arrEnd = jsEg.s.c_str - jsEg.sav_c_len;
strRestLen = jsLen - (arrEnd - (const uchar*)rawJS);
retJS.append(rawJS, arrEnd - (const uchar*)rawJS);
if (itemSize)
retJS.append(", ");
if (appendJSValue(retJS, cs, row, fp[i + 1]))
goto error;
retJS.append((const char*)arrEnd, strRestLen);
}
else
{
const uchar *start, *end;
/* Wrap as an array. */
retJS.append(rawJS, (const char*)jsEg.value_begin - rawJS);
start = jsEg.value_begin;
if (jsEg.value_type == JSON_VALUE_OBJECT)
{
if (json_skip_level(&jsEg))
goto error;
end = jsEg.s.c_str;
}
else
end = jsEg.value_end;
retJS.append("[");
retJS.append((const char*)start, end - start);
retJS.append(", ");
if (appendJSValue(retJS, cs, row, fp[i + 1]))
goto error;
retJS.append("]");
retJS.append((const char*)jsEg.s.c_str, rawJS + jsLen - (const char*)jsEg.s.c_str);
}
// tmpJS save the json string for next loop
tmpJS.swap(retJS);
retJS.clear();
}
initJSEngine(jsEg, cs, tmpJS);
retJS.clear();
if (doFormat(&jsEg, retJS, Func_json_format::LOOSE))
goto error;
isNull = false;
return retJS;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,142 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_array_insert::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_array_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
const CHARSET_INFO* cs = getCharset(fp[0]);
json_engine_t jsEg;
string retJS;
retJS.reserve(js.size() + 8);
initJSPaths(paths, fp, 1, 2);
string tmpJS{js};
for (size_t i = 1, j = 0; i < fp.size(); i += 2, j++)
{
const char* rawJS = tmpJS.data();
const size_t jsLen = tmpJS.size();
JSONPath& path = paths[j];
if (!path.parsed)
{
if (parseJSPath(path, row, fp[i]) || path.p.last_step - 1 < path.p.steps ||
path.p.last_step->type != JSON_PATH_ARRAY)
{
if (path.p.s.error == 0)
path.p.s.error = SHOULD_END_WITH_ARRAY;
goto error;
}
path.p.last_step--;
}
initJSEngine(jsEg, cs, tmpJS);
path.currStep = path.p.steps;
int jsErr = 0;
if (locateJSPath(jsEg, path, &jsErr))
{
if (jsErr)
goto error;
// Can't find the array to insert.
continue;
}
if (json_read_value(&jsEg))
goto error;
if (jsEg.value_type != JSON_VALUE_ARRAY)
{
/* Must be an array. */
continue;
}
const char* itemPos = 0;
IntType itemSize = 0;
while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_ARRAY_END)
{
DBUG_ASSERT(jsEg.state == JST_VALUE);
if (itemSize == path.p.last_step[1].n_item)
{
itemPos = (const char*)jsEg.s.c_str;
break;
}
itemSize++;
if (json_read_value(&jsEg) || (!json_value_scalar(&jsEg) && json_skip_level(&jsEg)))
goto error;
}
if (unlikely(jsEg.s.error || *jsEg.killed_ptr))
goto error;
if (itemPos)
{
retJS.append(rawJS, itemPos - rawJS);
if (itemSize > 0)
retJS.append(" ");
if (appendJSValue(retJS, cs, row, fp[i + 1]))
goto error;
retJS.append(",");
if (itemSize == 0)
retJS.append(" ");
retJS.append(itemPos, rawJS + jsLen - itemPos);
}
else
{
/* Insert position wasn't found - append to the array. */
DBUG_ASSERT(jsEg.state == JST_ARRAY_END);
itemPos = (const char*)(jsEg.s.c_str - jsEg.sav_c_len);
retJS.append(rawJS, itemPos - rawJS);
if (itemSize > 0)
retJS.append(", ");
if (appendJSValue(retJS, cs, row, fp[i + 1]))
goto error;
retJS.append(itemPos, rawJS + jsLen - itemPos);
}
// tmpJS save the json string for next loop
tmpJS.swap(retJS);
retJS.clear();
}
initJSEngine(jsEg, cs, tmpJS);
retJS.clear();
if (doFormat(&jsEg, retJS, Func_json_format::LOOSE))
goto error;
isNull = false;
return retJS;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,213 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
#include "rowgroup.h"
using namespace execplan;
using namespace rowgroup;
#include "dataconvert.h"
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace
{
static bool checkContains(json_engine_t* jsEg, json_engine_t* valEg)
{
json_engine_t localJsEg;
bool isEgSet;
switch (jsEg->value_type)
{
case JSON_VALUE_OBJECT:
{
json_string_t keyName;
if (valEg->value_type != JSON_VALUE_OBJECT)
return false;
localJsEg = *jsEg;
isEgSet = false;
json_string_set_cs(&keyName, valEg->s.cs);
while (json_scan_next(valEg) == 0 && valEg->state != JST_OBJ_END)
{
const uchar *keyStart, *keyEnd;
DBUG_ASSERT(valEg->state == JST_KEY);
keyStart = valEg->s.c_str;
do
{
keyEnd = valEg->s.c_str;
} while (json_read_keyname_chr(valEg) == 0);
if (unlikely(valEg->s.error) || json_read_value(valEg))
return false;
if (isEgSet)
*jsEg = localJsEg;
else
isEgSet = true;
json_string_set_str(&keyName, keyStart, keyEnd);
if (!findKeyInObject(jsEg, &keyName) || json_read_value(jsEg) || !checkContains(jsEg, valEg))
return false;
}
return valEg->state == JST_OBJ_END && !json_skip_level(jsEg);
}
case JSON_VALUE_ARRAY:
if (valEg->value_type != JSON_VALUE_ARRAY)
{
localJsEg = *valEg;
isEgSet = false;
while (json_scan_next(jsEg) == 0 && jsEg->state != JST_ARRAY_END)
{
int currLevel, isScaler;
DBUG_ASSERT(jsEg->state == JST_VALUE);
if (json_read_value(jsEg))
return false;
if (!(isScaler = json_value_scalar(jsEg)))
currLevel = json_get_level(jsEg);
if (isEgSet)
*valEg = localJsEg;
else
isEgSet = true;
if (checkContains(jsEg, valEg))
{
if (json_skip_level(jsEg))
return false;
return true;
}
if (unlikely(valEg->s.error) || unlikely(jsEg->s.error) ||
(!isScaler && json_skip_to_level(jsEg, currLevel)))
return false;
}
return false;
}
/* else */
localJsEg = *jsEg;
isEgSet = false;
while (json_scan_next(valEg) == 0 && valEg->state != JST_ARRAY_END)
{
DBUG_ASSERT(valEg->state == JST_VALUE);
if (json_read_value(valEg))
return false;
if (isEgSet)
*jsEg = localJsEg;
else
isEgSet = true;
if (!checkContains(jsEg, valEg))
return false;
}
return valEg->state == JST_ARRAY_END;
case JSON_VALUE_STRING:
if (valEg->value_type != JSON_VALUE_STRING)
return false;
/*
TODO: make proper json-json comparison here that takes excipient
into account.
*/
return valEg->value_len == jsEg->value_len && memcmp(valEg->value, jsEg->value, valEg->value_len) == 0;
case JSON_VALUE_NUMBER:
if (valEg->value_type == JSON_VALUE_NUMBER)
{
double jsEgVal, valEgVal;
char* end;
int err;
jsEgVal = jsEg->s.cs->strntod((char*)jsEg->value, jsEg->value_len, &end, &err);
;
valEgVal = valEg->s.cs->strntod((char*)valEg->value, valEg->value_len, &end, &err);
;
return (fabs(jsEgVal - valEgVal) < 1e-12);
}
else
return false;
default: break;
}
/*
We have these not mentioned in the 'switch' above:
case JSON_VALUE_TRUE:
case JSON_VALUE_FALSE:
case JSON_VALUE_NULL:
*/
return valEg->value_type == jsEg->value_type;
}
} // namespace
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_contains::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
/**
* getBoolVal API definition
*/
bool Func_json_contains::getBoolVal(Row& row, FunctionParm& fp, bool& isNull,
CalpontSystemCatalog::ColType& type)
{
bool isNullJS = false, isNullVal = false;
const string_view js = fp[0]->data()->getStrVal(row, isNullJS);
const string_view val = fp[1]->data()->getStrVal(row, isNullVal);
if (isNullJS || isNullVal)
{
isNull = true;
return false;
}
bool result = false;
if (!arg2Parsed)
{
if (!arg2Const)
{
ConstantColumn* constCol = dynamic_cast<ConstantColumn*>(fp[1]->data());
arg2Const = (constCol != nullptr);
}
arg2Val = val;
arg2Parsed = arg2Const;
}
json_engine_t jsEg;
initJSEngine(jsEg, getCharset(fp[0]), js);
if (fp.size() > 2)
{
if (!path.parsed && parseJSPath(path, row, fp[2], false))
goto error;
if (locateJSPath(jsEg, path))
goto error;
}
json_engine_t valEg;
initJSEngine(valEg, getCharset(fp[1]), arg2Val);
if (json_read_value(&jsEg) || json_read_value(&valEg))
goto error;
result = checkContains(&jsEg, &valEg);
if (unlikely(jsEg.s.error || valEg.s.error))
goto error;
return result;
error:
isNull = true;
return false;
}
} // namespace funcexp

View File

@ -0,0 +1,141 @@
#include <string_view>
#include <algorithm>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
#include "rowgroup.h"
using namespace execplan;
using namespace rowgroup;
#include "dataconvert.h"
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_contains_path::operationType(
FunctionParm& fp, CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
/**
* getBoolVal API definition
*/
bool Func_json_contains_path::getBoolVal(Row& row, FunctionParm& fp, bool& isNull,
CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return false;
#ifdef MYSQL_GE_1009
int arrayCounters[JSON_DEPTH_LIMIT];
bool hasNegPath = false;
#endif
const int argSize = fp.size() - 2;
if (!isModeParsed)
{
if (!isModeConst)
isModeConst = (dynamic_cast<ConstantColumn*>(fp[1]->data()) != nullptr);
string mode = fp[1]->data()->getStrVal(row, isNull);
if (isNull)
return false;
transform(mode.begin(), mode.end(), mode.begin(), ::tolower);
if (mode != "one" && mode != "all")
{
isNull = true;
return false;
}
isModeOne = (mode == "one");
isModeParsed = isModeConst;
}
initJSPaths(paths, fp, 2, 1);
if (paths.size() == 0)
hasFound.assign(argSize, false);
for (size_t i = 2; i < fp.size(); i++)
{
JSONPath& path = paths[i - 2];
if (!path.parsed)
{
if (parseJSPath(path, row, fp[i]))
{
isNull = true;
return false;
}
#ifdef MYSQL_GE_1009
hasNegPath |= path.p.types_used & JSON_PATH_NEGATIVE_INDEX;
#endif
}
}
json_engine_t jsEg;
json_path_t p;
json_get_path_start(&jsEg, getCharset(fp[0]), (const uchar*)js.data(), (const uchar*)js.data() + js.size(),
&p);
bool result = false;
int needFound = 0;
if (!isModeOne)
{
hasFound.assign(argSize, false);
needFound = argSize;
}
while (json_get_path_next(&jsEg, &p) == 0)
{
#ifdef MYSQL_GE_1009
if (hasNegPath && jsEg.value_type == JSON_VALUE_ARRAY &&
json_skip_array_and_count(&jsEg, arrayCounters + (p.last_step - p.steps)))
{
result = true;
break;
}
#endif
for (int restSize = argSize, curr = 0; restSize > 0; restSize--, curr++)
{
JSONPath& path = paths[curr];
#ifdef MYSQL_GE_1009
int cmp = cmpJSPath(&path.p, &p, jsEg.value_type, arrayCounters);
#else
int cmp = cmpJSPath(&path.p, &p, jsEg.value_type);
#endif
if (cmp >= 0)
{
if (isModeOne)
{
result = true;
break;
}
/* mode_all */
if (hasFound[restSize - 1])
continue; /* already found */
if (--needFound == 0)
{
result = true;
break;
}
hasFound[restSize - 1] = true;
}
}
}
if (likely(jsEg.s.error == 0))
return result;
isNull = true;
return false;
}
} // namespace funcexp

View File

@ -0,0 +1,67 @@
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "dataconvert.h"
using namespace dataconvert;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_depth::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
int64_t Func_json_depth::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return 0;
int depth = 0, currDepth = 0;
bool incDepth = true;
json_engine_t jsEg;
initJSEngine(jsEg, getCharset(fp[0]), js);
do
{
switch (jsEg.state)
{
case JST_VALUE:
case JST_KEY:
if (incDepth)
{
currDepth++;
incDepth = false;
if (currDepth > depth)
depth = currDepth;
}
break;
case JST_OBJ_START:
case JST_ARRAY_START: incDepth = true; break;
case JST_OBJ_END:
case JST_ARRAY_END:
if (!incDepth)
currDepth--;
incDepth = false;
break;
default: break;
}
} while (json_scan_next(&jsEg) == 0);
if (likely(!jsEg.s.error))
return depth;
isNull = true;
return 0;
}
} // namespace funcexp

View File

@ -0,0 +1,71 @@
#include <string_view>
#include <memory>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
#include "rowgroup.h"
using namespace execplan;
using namespace rowgroup;
#include "dataconvert.h"
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_equals::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
/**
* getBoolVal API definition
*/
bool Func_json_equals::getBoolVal(Row& row, FunctionParm& fp, bool& isNull,
CalpontSystemCatalog::ColType& type)
{
// auto release the DYNAMIC_STRING
using DynamicString = unique_ptr<DYNAMIC_STRING, decltype(&dynstr_free)>;
DynamicString str1{new DYNAMIC_STRING(), dynstr_free};
if (init_dynamic_string(str1.get(), NULL, 0, 0))
{
isNull = true;
return true;
}
DynamicString str2{new DYNAMIC_STRING(), dynstr_free};
if (init_dynamic_string(str2.get(), NULL, 0, 0))
{
isNull = true;
return true;
}
const string_view js1 = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return false;
const string_view js2 = fp[1]->data()->getStrVal(row, isNull);
if (isNull)
return false;
bool result = false;
if (json_normalize(str1.get(), js1.data(), js1.size(), getCharset(fp[0])))
{
isNull = true;
return result;
}
if (json_normalize(str2.get(), js2.data(), js2.size(), getCharset(fp[1])))
{
isNull = true;
return result;
}
result = strcmp(str1->str, str2->str) ? false : true;
return result;
}
} // namespace funcexp

View File

@ -0,0 +1,51 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
#include "rowgroup.h"
using namespace execplan;
using namespace rowgroup;
#include "dataconvert.h"
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_exists::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
/**
* getBoolVal API definition
*/
bool Func_json_exists::getBoolVal(Row& row, FunctionParm& fp, bool& isNull,
CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return false;
int jsErr = 0;
json_engine_t jsEg;
initJSEngine(jsEg, getCharset(fp[0]), js);
if (!path.parsed && parseJSPath(path, row, fp[1]))
goto error;
if (locateJSPath(jsEg, path, &jsErr))
{
if (jsErr)
goto error;
return false;
}
return true;
error:
isNull = true;
return false;
}
} // namespace funcexp

View File

@ -0,0 +1,243 @@
#include <type_traits>
#include "functor_json.h"
#include "functioncolumn.h"
#include "rowgroup.h"
#include "treenode.h"
using namespace execplan;
using namespace rowgroup;
#include "dataconvert.h"
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
int Func_json_extract::doExtract(Row& row, FunctionParm& fp, json_value_types* type, string& retJS,
bool compareWhole = true)
{
bool isNull = false;
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return 1;
const char* rawJS = js.data();
json_engine_t jsEg, savJSEg;
json_path_t p;
const uchar* value;
bool notFirstVal = false;
size_t valLen;
bool mayMulVal;
int wildcards;
bool isMatch;
#ifdef MYSQL_GE_1009
int arrayCounter[JSON_DEPTH_LIMIT];
bool hasNegPath = false;
#endif
const size_t argSize = fp.size();
string tmp;
initJSPaths(paths, fp, 1, 1);
for (size_t i = 1; i < argSize; i++)
{
JSONPath& path = paths[i - 1];
path.p.types_used = JSON_PATH_KEY_NULL;
if (!path.parsed && parseJSPath(path, row, fp[i]))
goto error;
#ifdef MYSQL_GE_1009
hasNegPath |= path.p.types_used & JSON_PATH_NEGATIVE_INDEX;
#endif
}
#ifdef MYSQL_GE_1009
wildcards = (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | JSON_PATH_ARRAY_RANGE);
#else
wildcards = (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD);
#endif
mayMulVal = argSize > 2 || (paths[0].p.types_used & wildcards);
*type = mayMulVal ? JSON_VALUE_ARRAY : JSON_VALUE_NULL;
if (compareWhole)
{
retJS.clear();
if (mayMulVal)
retJS.append("[");
}
json_get_path_start(&jsEg, getCharset(fp[0]), (const uchar*)rawJS, (const uchar*)rawJS + js.size(), &p);
while (json_get_path_next(&jsEg, &p) == 0)
{
#ifdef MYSQL_GE_1009
if (hasNegPath && jsEg.value_type == JSON_VALUE_ARRAY &&
json_skip_array_and_count(&jsEg, arrayCounter + (p.last_step - p.steps)))
goto error;
#endif
#ifdef MYSQL_GE_1009
isMatch = matchJSPath(paths, &p, jsEg.value_type, arrayCounter, false);
#else
isMatch = matchJSPath(paths, &p, jsEg.value_type, nullptr, false);
#endif
if (!isMatch)
continue;
value = jsEg.value_begin;
if (*type == JSON_VALUE_NULL)
*type = jsEg.value_type;
/* we only care about the first found value */
if (!compareWhole)
{
retJS = js;
return 0;
}
if (json_value_scalar(&jsEg))
valLen = jsEg.value_end - value;
else
{
if (mayMulVal)
savJSEg = jsEg;
if (json_skip_level(&jsEg))
goto error;
valLen = jsEg.s.c_str - value;
if (mayMulVal)
jsEg = savJSEg;
}
if (notFirstVal)
retJS.append(", ");
retJS.append((const char*)value, valLen);
notFirstVal = true;
if (!mayMulVal)
{
/* Loop to the end of the JSON just to make sure it's valid. */
while (json_get_path_next(&jsEg, &p) == 0)
{
}
break;
}
}
if (unlikely(jsEg.s.error))
goto error;
if (!notFirstVal)
/* Nothing was found. */
goto error;
if (mayMulVal)
retJS.append("]");
initJSEngine(jsEg, getCharset(fp[0]), retJS);
if (doFormat(&jsEg, tmp, Func_json_format::LOOSE))
goto error;
retJS.clear();
retJS.swap(tmp);
return 0;
error:
return 1;
}
CalpontSystemCatalog::ColType Func_json_extract::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_extract::getStrVal(Row& row, FunctionParm& fp, bool& isNull,
CalpontSystemCatalog::ColType& type)
{
string retJS;
json_value_types valType;
if (doExtract(row, fp, &valType, retJS) == 0)
return retJS;
isNull = true;
return "";
}
int64_t Func_json_extract::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
string retJS;
json_value_types valType;
int64_t ret = 0;
if (doExtract(row, fp, &valType, retJS, false) == 0)
{
switch (valType)
{
case JSON_VALUE_NUMBER:
case JSON_VALUE_STRING:
{
char* end;
int err;
ret = getCharset(fp[0])->strntoll(retJS.data(), retJS.size(), 10, &end, &err);
break;
}
case JSON_VALUE_TRUE: ret = 1; break;
default: break;
};
}
return ret;
}
double Func_json_extract::getDoubleVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
string retJS;
json_value_types valType;
double ret = 0.0;
if (doExtract(row, fp, &valType, retJS, false) == 0)
{
switch (valType)
{
case JSON_VALUE_NUMBER:
case JSON_VALUE_STRING:
{
char* end;
int err;
ret = getCharset(fp[0])->strntod(retJS.data(), retJS.size(), &end, &err);
break;
}
case JSON_VALUE_TRUE: ret = 1.0; break;
default: break;
};
}
return ret;
}
execplan::IDB_Decimal Func_json_extract::getDecimalVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
json_value_types valType;
string retJS;
if (doExtract(row, fp, &valType, retJS, false) == 0)
{
switch (valType)
{
case JSON_VALUE_STRING:
case JSON_VALUE_NUMBER: return fp[0]->data()->getDecimalVal(row, isNull);
case JSON_VALUE_TRUE: return IDB_Decimal(1, 0, 1);
case JSON_VALUE_OBJECT:
case JSON_VALUE_ARRAY:
case JSON_VALUE_FALSE:
case JSON_VALUE_NULL:
case JSON_VALUE_UNINITIALIZED: break;
};
}
return IDB_Decimal(0, 0, 1);
}
} // namespace funcexp

View File

@ -0,0 +1,61 @@
#include <string_view>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_format::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_format::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
int tabSize = 4;
if (fmt == DETAILED)
{
if (fp.size() > 1)
{
tabSize = fp[1]->data()->getIntVal(row, isNull);
if (isNull)
return "";
if (tabSize < 0)
tabSize = 0;
else if (tabSize > TAB_SIZE_LIMIT)
tabSize = TAB_SIZE_LIMIT;
}
}
json_engine_t jsEg;
initJSEngine(jsEg, getCharset(fp[0]), js);
string ret;
if (doFormat(&jsEg, ret, fmt, tabSize))
{
isNull = true;
return "";
}
isNull = false;
return ret;
}
} // namespace funcexp

View File

@ -0,0 +1,245 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "dataconvert.h"
using namespace dataconvert;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_insert::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
const bool isInsertMode = mode == INSERT || mode == SET;
const bool isReplaceMode = mode == REPLACE || mode == SET;
json_engine_t jsEg;
int jsErr = 0;
json_string_t keyName;
const CHARSET_INFO* cs = getCharset(fp[0]);
json_string_set_cs(&keyName, cs);
initJSPaths(paths, fp, 1, 2);
// Save the result of each merge and the result of the final merge separately
string retJS;
string tmpJS{js};
for (size_t i = 1, j = 0; i < fp.size(); i += 2, j++)
{
const char* rawJS = tmpJS.data();
const size_t jsLen = tmpJS.size();
JSONPath& path = paths[j];
const json_path_step_t* lastStep;
const char* valEnd;
if (!path.parsed)
{
if (parseJSPath(path, row, fp[i], false))
goto error;
path.p.last_step--;
}
initJSEngine(jsEg, cs, tmpJS);
if (path.p.last_step < path.p.steps)
goto v_found;
if (path.p.last_step >= path.p.steps && locateJSPath(jsEg, path, &jsErr))
{
if (jsErr)
goto error;
continue;
}
if (json_read_value(&jsEg))
goto error;
lastStep = path.p.last_step + 1;
if (lastStep->type & JSON_PATH_ARRAY)
{
IntType itemSize = 0;
if (jsEg.value_type != JSON_VALUE_ARRAY)
{
const uchar* valStart = jsEg.value_begin;
bool isArrAutoWrap;
if (isInsertMode)
{
if (isReplaceMode)
isArrAutoWrap = lastStep->n_item > 0;
else
{
if (lastStep->n_item == 0)
continue;
isArrAutoWrap = true;
}
}
else
{
if (lastStep->n_item)
continue;
isArrAutoWrap = false;
}
retJS.clear();
/* Wrap the value as an array. */
retJS.append(rawJS, (const char*)valStart - rawJS);
if (isArrAutoWrap)
retJS.append("[");
if (jsEg.value_type == JSON_VALUE_OBJECT)
{
if (json_skip_level(&jsEg))
goto error;
}
if (isArrAutoWrap)
retJS.append((const char*)valStart, jsEg.s.c_str - valStart);
retJS.append(", ");
if (appendJSValue(retJS, cs, row, fp[i + 1]))
goto error;
if (isArrAutoWrap)
retJS.append("]");
retJS.append((const char*)jsEg.s.c_str, rawJS + jsLen - (const char*)jsEg.s.c_str);
goto continue_point;
}
while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_ARRAY_END)
{
switch (jsEg.state)
{
case JST_VALUE:
if (itemSize == lastStep->n_item)
goto v_found;
itemSize++;
if (json_skip_array_item(&jsEg))
goto error;
break;
default: break;
}
}
if (unlikely(jsEg.s.error))
goto error;
if (!isInsertMode)
continue;
valEnd = (const char*)(jsEg.s.c_str - jsEg.sav_c_len);
retJS.clear();
retJS.append(rawJS, valEnd - rawJS);
if (itemSize > 0)
retJS.append(", ");
if (appendJSValue(retJS, cs, row, fp[i + 1]))
goto error;
retJS.append(valEnd, rawJS + jsLen - valEnd);
}
else /*JSON_PATH_KEY*/
{
IntType keySize = 0;
if (jsEg.value_type != JSON_VALUE_OBJECT)
continue;
while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_OBJ_END)
{
switch (jsEg.state)
{
case JST_KEY:
json_string_set_str(&keyName, lastStep->key, lastStep->key_end);
if (json_key_matches(&jsEg, &keyName))
goto v_found;
keySize++;
if (json_skip_key(&jsEg))
goto error;
break;
default: break;
}
}
if (unlikely(jsEg.s.error))
goto error;
if (!isInsertMode)
continue;
valEnd = (const char*)(jsEg.s.c_str - jsEg.sav_c_len);
retJS.clear();
retJS.append(rawJS, valEnd - rawJS);
if (keySize > 0)
retJS.append(", ");
retJS.append("\"");
retJS.append((const char*)lastStep->key, lastStep->key_end - lastStep->key);
retJS.append("\":");
if (appendJSValue(retJS, cs, row, fp[i + 1]))
goto error;
retJS.append(valEnd, rawJS + jsLen - valEnd);
}
goto continue_point;
v_found:
if (!isReplaceMode)
continue;
if (json_read_value(&jsEg))
goto error;
valEnd = (const char*)jsEg.value_begin;
retJS.clear();
if (!json_value_scalar(&jsEg))
{
if (json_skip_level(&jsEg))
goto error;
}
retJS.append(rawJS, valEnd - rawJS);
if (appendJSValue(retJS, cs, row, fp[i + 1]))
goto error;
retJS.append((const char*)jsEg.s.c_str, rawJS + jsLen - (const char*)jsEg.s.c_str);
continue_point:
// tmpJS save the json string for next loop
tmpJS.swap(retJS);
retJS.clear();
}
initJSEngine(jsEg, cs, tmpJS);
retJS.clear();
if (doFormat(&jsEg, retJS, Func_json_format::LOOSE))
goto error;
isNull = false;
return retJS;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,130 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "dataconvert.h"
using namespace dataconvert;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace
{
bool checkKeyInList(const string& res, const uchar* key, const int keyLen)
{
const uchar* curr = (const uchar*)res.c_str() + 2; /* beginning '["' */
const uchar* end = (const uchar*)res.c_str() + res.size() - 1; /* ending '"' */
while (curr < end)
{
int i;
for (i = 0; curr[i] != '"' && i < keyLen; i++)
{
if (curr[i] != key[i])
break;
}
if (curr[i] == '"')
{
if (i == keyLen)
return true;
}
else
{
while (curr[i] != '"')
i++;
}
curr += i + 4; /* skip ', "' */
}
return false;
}
} // namespace
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_keys::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_keys::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
IntType keySize = 0;
string ret;
json_engine_t jsEg;
initJSEngine(jsEg, getCharset(fp[0]), js);
if (fp.size() > 1)
{
if (!path.parsed && parseJSPath(path, row, fp[1], false))
goto error;
if (locateJSPath(jsEg, path))
goto error;
}
if (json_read_value(&jsEg))
goto error;
if (jsEg.value_type != JSON_VALUE_OBJECT)
goto error;
ret.append("[");
while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_OBJ_END)
{
const uchar *keyStart, *keyEnd;
int keyLen;
switch (jsEg.state)
{
case JST_KEY:
keyStart = jsEg.s.c_str;
do
{
keyEnd = jsEg.s.c_str;
} while (json_read_keyname_chr(&jsEg) == 0);
if (unlikely(jsEg.s.error))
goto error;
keyLen = (int)(keyEnd - keyStart);
if (!checkKeyInList(ret, keyStart, keyLen))
{
if (keySize > 0)
ret.append(", ");
ret.append("\"");
ret.append((const char*)keyStart, keyLen);
ret.append("\"");
keySize++;
}
break;
case JST_OBJ_START:
case JST_ARRAY_START:
if (json_skip_level(&jsEg))
break;
break;
default: break;
}
}
if (unlikely(!jsEg.s.error))
{
ret.append("]");
return ret;
}
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,81 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "dataconvert.h"
using namespace dataconvert;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_length::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
int64_t Func_json_length::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return 0;
json_engine_t jsEg;
int length = 0;
int err;
initJSEngine(jsEg, getCharset(fp[0]), js);
if (fp.size() > 1)
{
if (!path.parsed && parseJSPath(path, row, fp[1], false))
goto error;
if (locateJSPath(jsEg, path))
goto error;
}
if (json_read_value(&jsEg))
goto error;
if (json_value_scalar(&jsEg))
return 1;
while (!(err = json_scan_next(&jsEg)) && jsEg.state != JST_OBJ_END && jsEg.state != JST_ARRAY_END)
{
switch (jsEg.state)
{
case JST_VALUE:
case JST_KEY: length++; break;
case JST_OBJ_START:
case JST_ARRAY_START:
if (json_skip_level(&jsEg))
goto error;
break;
default: break;
};
}
if (!err)
{
// Parse to the end of the JSON just to check it's valid.
while (json_scan_next(&jsEg) == 0)
{
}
}
if (likely(!jsEg.s.error))
return length;
error:
isNull = true;
return 0;
}
} // namespace funcexp

View File

@ -0,0 +1,260 @@
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace
{
int doMerge(string& retJS, json_engine_t* jsEg1, json_engine_t* jsEg2)
{
if (json_read_value(jsEg1) || json_read_value(jsEg2))
return 1;
if (jsEg1->value_type == JSON_VALUE_OBJECT && jsEg2->value_type == JSON_VALUE_OBJECT)
{
json_engine_t savJSEg1 = *jsEg1;
json_engine_t savJSEg2 = *jsEg2;
int firstKey = 1;
json_string_t keyName;
json_string_set_cs(&keyName, jsEg1->s.cs);
retJS.append("{");
while (json_scan_next(jsEg1) == 0 && jsEg1->state != JST_OBJ_END)
{
const uchar *keyStart, *keyEnd;
/* Loop through the Json_1 keys and compare with the Json_2 keys. */
DBUG_ASSERT(jsEg1->state == JST_KEY);
keyStart = jsEg1->s.c_str;
do
{
keyEnd = jsEg1->s.c_str;
} while (json_read_keyname_chr(jsEg1) == 0);
if (unlikely(jsEg1->s.error))
return 1;
if (firstKey)
firstKey = 0;
else
{
retJS.append(", ");
*jsEg2 = savJSEg2;
}
retJS.append("\"");
retJS.append((const char*)keyStart, (size_t)(keyEnd - keyStart));
retJS.append("\":");
while (json_scan_next(jsEg2) == 0 && jsEg2->state != JST_OBJ_END)
{
int ires;
DBUG_ASSERT(jsEg2->state == JST_KEY);
json_string_set_str(&keyName, keyStart, keyEnd);
if (!json_key_matches(jsEg2, &keyName))
{
if (jsEg2->s.error || json_skip_key(jsEg2))
return 2;
continue;
}
/* Json_2 has same key as Json_1. Merge them. */
if ((ires = doMerge(retJS, jsEg1, jsEg2)))
return ires;
goto merged_j1;
}
if (unlikely(jsEg2->s.error))
return 2;
keyStart = jsEg1->s.c_str;
/* Just append the Json_1 key value. */
if (json_skip_key(jsEg1))
return 1;
retJS.append((const char*)keyStart, jsEg1->s.c_str - keyStart);
merged_j1:
continue;
}
*jsEg2 = savJSEg2;
/*
Now loop through the Json_2 keys.
Skip if there is same key in Json_1
*/
while (json_scan_next(jsEg2) == 0 && jsEg2->state != JST_OBJ_END)
{
const uchar *keyStart, *keyEnd;
DBUG_ASSERT(jsEg2->state == JST_KEY);
keyStart = jsEg2->s.c_str;
do
{
keyEnd = jsEg2->s.c_str;
} while (json_read_keyname_chr(jsEg2) == 0);
if (unlikely(jsEg2->s.error))
return 1;
*jsEg1 = savJSEg1;
while (json_scan_next(jsEg1) == 0 && jsEg1->state != JST_OBJ_END)
{
DBUG_ASSERT(jsEg1->state == JST_KEY);
json_string_set_str(&keyName, keyStart, keyEnd);
if (!json_key_matches(jsEg1, &keyName))
{
if (unlikely(jsEg1->s.error || json_skip_key(jsEg1)))
return 2;
continue;
}
if (json_skip_key(jsEg2) || json_skip_level(jsEg1))
return 1;
goto continue_j2;
}
if (unlikely(jsEg1->s.error))
return 2;
if (firstKey)
firstKey = 0;
else
retJS.append(", ");
if (json_skip_key(jsEg2))
return 1;
retJS.append("\"");
retJS.append((const char*)keyStart, jsEg2->s.c_str - keyStart);
continue_j2:
continue;
}
retJS.append("}");
}
else
{
const uchar *end1, *beg1, *end2, *beg2;
int itemSize1 = 1, itemSize2 = 1;
beg1 = jsEg1->value_begin;
/* Merge as a single array. */
if (jsEg1->value_type == JSON_VALUE_ARRAY)
{
if (json_skip_level_and_count(jsEg1, &itemSize1))
return 1;
end1 = jsEg1->s.c_str - jsEg1->sav_c_len;
}
else
{
retJS.append("[");
if (jsEg1->value_type == JSON_VALUE_OBJECT)
{
if (json_skip_level(jsEg1))
return 1;
end1 = jsEg1->s.c_str;
}
else
end1 = jsEg1->value_end;
}
retJS.append((const char*)beg1, end1 - beg1);
if (json_value_scalar(jsEg2))
{
beg2 = jsEg2->value_begin;
end2 = jsEg2->value_end;
}
else
{
if (jsEg2->value_type == JSON_VALUE_OBJECT)
{
beg2 = jsEg2->value_begin;
if (json_skip_level(jsEg2))
return 2;
}
else
{
beg2 = jsEg2->s.c_str;
if (json_skip_level_and_count(jsEg2, &itemSize2))
return 2;
}
end2 = jsEg2->s.c_str;
}
if (itemSize1 && itemSize2)
retJS.append(", ");
retJS.append((const char*)beg2, end2 - beg2);
if (jsEg2->value_type != JSON_VALUE_ARRAY)
retJS.append("]");
}
return 0;
}
} // namespace
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_merge::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_merge::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
const CHARSET_INFO* js1CS = getCharset(fp[0]);
json_engine_t jsEg1, jsEg2;
string tmpJS{js};
string retJS;
for (size_t i = 1; i < fp.size(); i++)
{
const string_view js2 = fp[i]->data()->getStrVal(row, isNull);
if (isNull)
goto error;
initJSEngine(jsEg1, js1CS, tmpJS);
initJSEngine(jsEg2, getCharset(fp[i]), js2);
if (doMerge(retJS, &jsEg1, &jsEg2))
goto error;
// tmpJS save the merge result for next loop
tmpJS.swap(retJS);
retJS.clear();
}
initJSEngine(jsEg1, js1CS, tmpJS);
retJS.clear();
if (doFormat(&jsEg1, retJS, Func_json_format::LOOSE))
goto error;
isNull = false;
return retJS;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,308 @@
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace
{
int copyValuePatch(string& retJS, json_engine_t* jsEg)
{
int firstKey = 1;
if (jsEg->value_type != JSON_VALUE_OBJECT)
{
const uchar *beg, *end;
beg = jsEg->value_begin;
if (!json_value_scalar(jsEg))
{
if (json_skip_level(jsEg))
return 1;
end = jsEg->s.c_str;
}
else
end = jsEg->value_end;
retJS.append((const char*)beg, end - beg);
return 0;
}
/* JSON_VALUE_OBJECT */
retJS.append("{");
while (json_scan_next(jsEg) == 0 && jsEg->state != JST_OBJ_END)
{
const uchar* keyStart;
/* Loop through the Json_1 keys and compare with the Json_2 keys. */
DBUG_ASSERT(jsEg->state == JST_KEY);
keyStart = jsEg->s.c_str;
if (json_read_value(jsEg))
return 1;
if (jsEg->value_type == JSON_VALUE_NULL)
continue;
if (!firstKey)
retJS.append(", ");
else
firstKey = 0;
retJS.append("\"");
retJS.append((const char*)keyStart, jsEg->value_begin - keyStart);
if (copyValuePatch(retJS, jsEg))
return 1;
}
retJS.append("}");
return 0;
}
int doMergePatch(string& retJS, json_engine_t* jsEg1, json_engine_t* jsEg2, bool& isEmpty)
{
if (json_read_value(jsEg1) || json_read_value(jsEg2))
return 1;
if (jsEg1->value_type == JSON_VALUE_OBJECT && jsEg2->value_type == JSON_VALUE_OBJECT)
{
json_engine_t savJSEg1 = *jsEg1;
json_engine_t savJSEg2 = *jsEg2;
int firstKey = 1;
json_string_t keyName;
size_t savLen;
bool mrgEmpty;
isEmpty = false;
json_string_set_cs(&keyName, jsEg1->s.cs);
retJS.append("{");
while (json_scan_next(jsEg1) == 0 && jsEg1->state != JST_OBJ_END)
{
const uchar *keyStart, *keyEnd;
/* Loop through the Json_1 keys and compare with the Json_2 keys. */
DBUG_ASSERT(jsEg1->state == JST_KEY);
keyStart = jsEg1->s.c_str;
do
{
keyEnd = jsEg1->s.c_str;
} while (json_read_keyname_chr(jsEg1) == 0);
if (jsEg1->s.error)
return 1;
savLen = retJS.size();
if (!firstKey)
{
retJS.append(", ");
*jsEg2 = savJSEg2;
}
retJS.append("\"");
retJS.append((const char*)keyStart, keyEnd - keyStart);
retJS.append("\":");
while (json_scan_next(jsEg2) == 0 && jsEg2->state != JST_OBJ_END)
{
int ires;
DBUG_ASSERT(jsEg2->state == JST_KEY);
json_string_set_str(&keyName, keyStart, keyEnd);
if (!json_key_matches(jsEg2, &keyName))
{
if (jsEg2->s.error || json_skip_key(jsEg2))
return 2;
continue;
}
/* Json_2 has same key as Json_1. Merge them. */
if ((ires = doMergePatch(retJS, jsEg1, jsEg2, mrgEmpty)))
return ires;
if (mrgEmpty)
retJS = retJS.substr(0, savLen);
else
firstKey = 0;
goto merged_j1;
}
if (jsEg2->s.error)
return 2;
keyStart = jsEg1->s.c_str;
/* Just append the Json_1 key value. */
if (json_skip_key(jsEg1))
return 1;
retJS.append((const char*)keyStart, jsEg1->s.c_str - keyStart);
firstKey = 0;
merged_j1:
continue;
}
*jsEg2 = savJSEg2;
/*
Now loop through the Json_2 keys.
Skip if there is same key in Json_1
*/
while (json_scan_next(jsEg2) == 0 && jsEg2->state != JST_OBJ_END)
{
const uchar *keyStart, *keyEnd;
DBUG_ASSERT(jsEg2->state == JST_KEY);
keyStart = jsEg2->s.c_str;
do
{
keyEnd = jsEg2->s.c_str;
} while (json_read_keyname_chr(jsEg2) == 0);
if (jsEg2->s.error)
return 1;
*jsEg1 = savJSEg1;
while (json_scan_next(jsEg1) == 0 && jsEg1->state != JST_OBJ_END)
{
DBUG_ASSERT(jsEg1->state == JST_KEY);
json_string_set_str(&keyName, keyStart, keyEnd);
if (!json_key_matches(jsEg1, &keyName))
{
if (jsEg1->s.error || json_skip_key(jsEg1))
return 2;
continue;
}
if (json_skip_key(jsEg2) || json_skip_level(jsEg1))
return 1;
goto continue_j2;
}
if (jsEg1->s.error)
return 2;
savLen = retJS.size();
if (!firstKey)
retJS.append(", ");
retJS.append("\"");
retJS.append((const char*)keyStart, keyEnd - keyStart);
retJS.append("\":");
if (json_read_value(jsEg2))
return 1;
if (jsEg2->value_type == JSON_VALUE_NULL)
retJS = retJS.substr(0, savLen);
else
{
if (copyValuePatch(retJS, jsEg2))
return 1;
firstKey = 0;
}
continue_j2:
continue;
}
retJS.append("}");
}
else
{
if (!json_value_scalar(jsEg1) && json_skip_level(jsEg1))
return 1;
isEmpty = (jsEg2->value_type == JSON_VALUE_NULL);
if (!isEmpty && copyValuePatch(retJS, jsEg2))
return 1;
}
return 0;
}
} // namespace
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_merge_patch::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_merge_patch::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
// JSON_MERGE_PATCH return NULL if any argument is NULL
bool isEmpty = false, hasNullArg = false;
const string_view js = fp[0]->data()->getStrVal(row, isNull);
hasNullArg = isNull;
if (isNull)
isNull = false;
json_engine_t jsEg1, jsEg2;
jsEg1.s.error = jsEg2.s.error = 0;
string tmpJS{js};
string retJS;
for (size_t i = 1; i < fp.size(); i++)
{
const string_view js2 = fp[i]->data()->getStrVal(row, isNull);
if (isNull)
{
hasNullArg = true;
isNull = false;
goto next;
}
initJSEngine(jsEg2, getCharset(fp[i]), js2);
if (hasNullArg)
{
if (json_read_value(&jsEg2))
goto error;
if (jsEg2.value_type == JSON_VALUE_OBJECT)
goto next;
hasNullArg = false;
retJS.append(js2.data());
goto next;
}
initJSEngine(jsEg1, getCharset(fp[0]), tmpJS);
if (doMergePatch(retJS, &jsEg1, &jsEg2, isEmpty))
goto error;
if (isEmpty)
retJS.append("null");
next:
// tmpJS save the merge result for next loop
tmpJS.swap(retJS);
retJS.clear();
}
if (hasNullArg)
goto error;
initJSEngine(jsEg1, getCharset(fp[0]), tmpJS);
retJS.clear();
if (doFormat(&jsEg1, retJS, Func_json_format::LOOSE))
goto error;
isNull = false;
return retJS;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,47 @@
#include <string_view>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_normalize::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_normalize::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
using DynamicString = unique_ptr<DYNAMIC_STRING, decltype(&dynstr_free)>;
DynamicString str{new DYNAMIC_STRING(), dynstr_free};
if (init_dynamic_string(str.get(), NULL, 0, 0))
goto error;
if (json_normalize(str.get(), js.data(), js.size(), getCharset(fp[0])))
goto error;
return str->str;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,54 @@
#include <string_view>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "mcs_datatype.h"
using namespace datatypes;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_object::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp.size() > 0 ? fp[0]->data()->resultType() : resultType;
}
string Func_json_object::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
if (fp.size() == 0)
return "{}";
const CHARSET_INFO* retCS = type.getCharset();
string ret("{");
if (appendJSKeyName(ret, retCS, row, fp[0]) || appendJSValue(ret, retCS, row, fp[1]))
goto error;
for (size_t i = 2; i < fp.size(); i += 2)
{
ret.append(", ");
if (appendJSKeyName(ret, retCS, row, fp[i]) || appendJSValue(ret, retCS, row, fp[i + 1]))
goto error;
}
ret.append("}");
return ret;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,300 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "rowgroup.h"
using namespace execplan;
using namespace rowgroup;
#include "dataconvert.h"
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace
{
int checkOverlapsWithObj(json_engine_t* jsEg, json_engine_t* jsEg2, bool compareWhole);
bool checkOverlaps(json_engine_t* jsEg1, json_engine_t* jsEg2, bool compareWhole);
/*
When the two values match or don't match we need to return true or false.
But we can have some more elements in the array left or some more keys
left in the object that we no longer want to compare. In this case,
we want to skip the current item.
*/
void jsonSkipCurrLevel(json_engine_t* jsEg1, json_engine_t* jsEg2)
{
json_skip_level(jsEg1);
json_skip_level(jsEg2);
}
/* At least one of the two arguments is a scalar. */
bool checkOverlapsWithScalar(json_engine_t* jsEg1, json_engine_t* jsEg2)
{
if (json_value_scalar(jsEg2))
{
if (jsEg1->value_type == jsEg2->value_type)
{
if (jsEg1->value_type == JSON_VALUE_NUMBER)
{
double dj, dv;
char* end;
int err;
dj = jsEg1->s.cs->strntod((char*)jsEg1->value, jsEg1->value_len, &end, &err);
dv = jsEg2->s.cs->strntod((char*)jsEg2->value, jsEg2->value_len, &end, &err);
return (fabs(dj - dv) < 1e-12);
}
else if (jsEg1->value_type == JSON_VALUE_STRING)
{
return jsEg2->value_len == jsEg1->value_len &&
memcmp(jsEg2->value, jsEg1->value, jsEg2->value_len) == 0;
}
}
return jsEg2->value_type == jsEg1->value_type;
}
else if (jsEg2->value_type == JSON_VALUE_ARRAY)
{
while (json_scan_next(jsEg2) == 0 && jsEg2->state == JST_VALUE)
{
if (json_read_value(jsEg2))
return false;
if (jsEg1->value_type == jsEg2->value_type)
{
int res1 = checkOverlapsWithScalar(jsEg1, jsEg2);
if (res1)
return true;
}
if (!json_value_scalar(jsEg2))
json_skip_level(jsEg2);
}
}
return false;
}
/*
Compare when one is object and other is array. This means we are looking
for the object in the array. Hence, when value type of an element of the
array is object, then compare the two objects entirely. If they are
equal return true else return false.
*/
bool jsonCmpWithArrAndObj(json_engine_t* jsEg1, json_engine_t* jsEg2)
{
st_json_engine_t locjsEg2 = *jsEg2;
while (json_scan_next(jsEg1) == 0 && jsEg1->state == JST_VALUE)
{
if (json_read_value(jsEg1))
return false;
if (jsEg1->value_type == JSON_VALUE_OBJECT)
{
int res1 = checkOverlapsWithObj(jsEg1, jsEg2, true);
if (res1)
return true;
*jsEg2 = locjsEg2;
}
if (!json_value_scalar(jsEg1))
json_skip_level(jsEg1);
}
return false;
}
bool jsonCmpArrInOrder(json_engine_t* jsEg1, json_engine_t* jsEg2)
{
bool res = false;
while (json_scan_next(jsEg1) == 0 && json_scan_next(jsEg2) == 0 && jsEg1->state == JST_VALUE &&
jsEg2->state == JST_VALUE)
{
if (json_read_value(jsEg1) || json_read_value(jsEg2))
return false;
if (jsEg1->value_type != jsEg2->value_type)
{
jsonSkipCurrLevel(jsEg1, jsEg2);
return false;
}
res = checkOverlaps(jsEg1, jsEg2, true);
if (!res)
{
jsonSkipCurrLevel(jsEg1, jsEg2);
return false;
}
}
res = (jsEg2->state == JST_ARRAY_END || jsEg2->state == JST_OBJ_END ? true : false);
jsonSkipCurrLevel(jsEg1, jsEg2);
return res;
}
int checkOverlapsWithArr(json_engine_t* jsEg1, json_engine_t* jsEg2, bool compareWhole)
{
if (jsEg2->value_type == JSON_VALUE_ARRAY)
{
if (compareWhole)
return jsonCmpArrInOrder(jsEg1, jsEg2);
json_engine_t locjsEg2ue = *jsEg2, currJSEg = *jsEg1;
while (json_scan_next(jsEg1) == 0 && jsEg1->state == JST_VALUE)
{
if (json_read_value(jsEg1))
return false;
currJSEg = *jsEg1;
while (json_scan_next(jsEg2) == 0 && jsEg2->state == JST_VALUE)
{
if (json_read_value(jsEg2))
return false;
if (jsEg1->value_type == jsEg2->value_type)
{
int res1 = checkOverlaps(jsEg1, jsEg2, true);
if (res1)
return true;
}
else
{
if (!json_value_scalar(jsEg2))
json_skip_level(jsEg2);
}
*jsEg1 = currJSEg;
}
*jsEg2 = locjsEg2ue;
if (!json_value_scalar(jsEg1))
json_skip_level(jsEg1);
}
return false;
}
else if (jsEg2->value_type == JSON_VALUE_OBJECT)
{
if (compareWhole)
{
jsonSkipCurrLevel(jsEg1, jsEg2);
return false;
}
return jsonCmpWithArrAndObj(jsEg1, jsEg2);
}
else
return checkOverlapsWithScalar(jsEg2, jsEg1);
}
int checkOverlapsWithObj(json_engine_t* jsEg1, json_engine_t* jsEg2, bool compareWhole)
{
if (jsEg2->value_type == JSON_VALUE_OBJECT)
{
/* Find at least one common key-value pair */
json_string_t keyName;
bool foundKey = false, foundVal = false;
json_engine_t locJSEg = *jsEg1;
const uchar *keyStart, *keyEnd;
json_string_set_cs(&keyName, jsEg2->s.cs);
while (json_scan_next(jsEg2) == 0 && jsEg2->state == JST_KEY)
{
keyStart = jsEg2->s.c_str;
do
{
keyEnd = jsEg2->s.c_str;
} while (json_read_keyname_chr(jsEg2) == 0);
if (unlikely(jsEg2->s.error))
return false;
json_string_set_str(&keyName, keyStart, keyEnd);
foundKey = findKeyInObject(jsEg1, &keyName);
foundVal = 0;
if (foundKey)
{
if (json_read_value(jsEg1) || json_read_value(jsEg2))
return false;
/*
The value of key-value pair can be an be anything. If it is an object
then we need to compare the whole value and if it is an array then
we need to compare the elements in that order. So set compareWhole
to true.
*/
if (jsEg1->value_type == jsEg2->value_type)
foundVal = checkOverlaps(jsEg1, jsEg2, true);
if (foundVal)
{
if (!compareWhole)
return true;
*jsEg1 = locJSEg;
}
else
{
if (compareWhole)
{
jsonSkipCurrLevel(jsEg1, jsEg2);
return false;
}
*jsEg1 = locJSEg;
}
}
else
{
if (compareWhole)
{
jsonSkipCurrLevel(jsEg1, jsEg2);
return false;
}
json_skip_key(jsEg2);
*jsEg1 = locJSEg;
}
}
jsonSkipCurrLevel(jsEg1, jsEg2);
return compareWhole ? true : false;
}
else if (jsEg2->value_type == JSON_VALUE_ARRAY)
{
if (compareWhole)
{
jsonSkipCurrLevel(jsEg1, jsEg2);
return false;
}
return jsonCmpWithArrAndObj(jsEg2, jsEg1);
}
return false;
}
bool checkOverlaps(json_engine_t* jsEg1, json_engine_t* jsEg2, bool compareWhole)
{
switch (jsEg1->value_type)
{
case JSON_VALUE_OBJECT: return checkOverlapsWithObj(jsEg1, jsEg2, compareWhole);
case JSON_VALUE_ARRAY: return checkOverlapsWithArr(jsEg1, jsEg2, compareWhole);
default: return checkOverlapsWithScalar(jsEg1, jsEg2);
}
return false;
}
} // namespace
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_overlaps::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
/**
* getBoolVal API definition
*/
bool Func_json_overlaps::getBoolVal(Row& row, FunctionParm& fp, bool& isNull,
CalpontSystemCatalog::ColType& type)
{
bool isNullJS1 = false, isNullJS2 = false;
const string_view js1 = fp[0]->data()->getStrVal(row, isNullJS1);
const string_view js2 = fp[1]->data()->getStrVal(row, isNullJS2);
if (isNullJS1 || isNullJS2)
return false;
json_engine_t jsEg1, jsEg2;
initJSEngine(jsEg1, getCharset(fp[0]), js1);
initJSEngine(jsEg2, getCharset(fp[1]), js2);
if (json_read_value(&jsEg1) || json_read_value(&jsEg2))
return false;
bool result = checkOverlaps(&jsEg1, &jsEg2, false);
if (unlikely(jsEg1.s.error || jsEg2.s.error))
return false;
return result;
}
} // namespace funcexp

View File

@ -0,0 +1,48 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
namespace funcexp
{
bool JSONEgWrapper::checkAndGetComplexVal(string& ret, int* error)
{
if (json_value_scalar(this))
{
/* We skip scalar values. */
if (json_scan_next(this))
*error = 1;
return true;
}
const uchar* tmpValue = value;
if (json_skip_level(this))
{
*error = 1;
return true;
}
ret.append((const char*)value, s.c_str - tmpValue);
return false;
}
CalpontSystemCatalog::ColType Func_json_query::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_query::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
string ret;
isNull = JSONPathWrapper::extract(ret, row, fp[0], fp[1]);
return isNull ? "" : ret;
}
} // namespace funcexp

View File

@ -0,0 +1,44 @@
#include <string_view>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "mcs_datatype.h"
using namespace datatypes;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_quote::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
std::string Func_json_quote::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull || !isCharType(fp[0]->data()->resultType().colDataType))
return "";
string ret("\"");
isNull = appendEscapedJS(ret, &my_charset_utf8mb4_bin, js, getCharset(fp[0]));
if (isNull)
return "";
ret.append("\"");
return ret;
}
} // namespace funcexp

View File

@ -0,0 +1,164 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "dataconvert.h"
using namespace dataconvert;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_remove::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_remove::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
json_engine_t jsEg;
int jsErr = 0;
json_string_t keyName;
const CHARSET_INFO* cs = getCharset(fp[0]);
json_string_set_cs(&keyName, cs);
initJSPaths(paths, fp, 1, 1);
string retJS;
string tmpJS{js};
for (size_t i = 1, j = 0; i < fp.size(); i++, j++)
{
const char* rawJS = tmpJS.data();
const size_t jsLen = tmpJS.size();
JSONPath& path = paths[j];
const json_path_step_t* lastStep;
const char *remStart = nullptr, *remEnd = nullptr;
IntType itemSize = 0;
if (!path.parsed)
{
if (parseJSPath(path, row, fp[i], false))
goto error;
path.p.last_step--;
if (path.p.last_step < path.p.steps)
{
path.p.s.error = TRIVIAL_PATH_NOT_ALLOWED;
goto error;
}
}
initJSEngine(jsEg, cs, rawJS);
if (path.p.last_step < path.p.steps)
goto v_found;
if (locateJSPath(jsEg, path, &jsErr) && jsErr)
goto error;
if (json_read_value(&jsEg))
goto error;
lastStep = path.p.last_step + 1;
if (lastStep->type & JSON_PATH_ARRAY)
{
if (jsEg.value_type != JSON_VALUE_ARRAY)
continue;
while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_ARRAY_END)
{
switch (jsEg.state)
{
case JST_VALUE:
if (itemSize == lastStep->n_item)
{
remStart = (const char*)(jsEg.s.c_str - (itemSize ? jsEg.sav_c_len : 0));
goto v_found;
}
itemSize++;
if (json_skip_array_item(&jsEg))
goto error;
break;
default: break;
}
}
if (unlikely(jsEg.s.error))
goto error;
continue;
}
else /*JSON_PATH_KEY*/
{
if (jsEg.value_type != JSON_VALUE_OBJECT)
continue;
while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_OBJ_END)
{
switch (jsEg.state)
{
case JST_KEY:
if (itemSize == 0)
remStart = (const char*)(jsEg.s.c_str - jsEg.sav_c_len);
json_string_set_str(&keyName, lastStep->key, lastStep->key_end);
if (json_key_matches(&jsEg, &keyName))
goto v_found;
if (json_skip_key(&jsEg))
goto error;
remStart = (const char*)jsEg.s.c_str;
itemSize++;
break;
default: break;
}
}
if (unlikely(jsEg.s.error))
goto error;
continue;
}
v_found:
if (json_skip_key(&jsEg) || json_scan_next(&jsEg))
goto error;
remEnd = (jsEg.state == JST_VALUE && itemSize == 0) ? (const char*)jsEg.s.c_str
: (const char*)(jsEg.s.c_str - jsEg.sav_c_len);
retJS.clear();
retJS.append(rawJS, remStart - rawJS);
if (jsEg.state == JST_KEY && itemSize > 0)
retJS.append(",");
retJS.append(remEnd, rawJS + jsLen - remEnd);
tmpJS.swap(retJS);
retJS.clear();
}
initJSEngine(jsEg, cs, tmpJS);
retJS.clear();
if (doFormat(&jsEg, retJS, Func_json_format::LOOSE))
goto error;
isNull = false;
return retJS;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,223 @@
#include <string_view>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace
{
static bool appendJSPath(string& ret, const json_path_t* p)
{
const json_path_step_t* c;
try
{
ret.append("\"$");
for (c = p->steps + 1; c <= p->last_step; c++)
{
if (c->type & JSON_PATH_KEY)
{
ret.append(".", 1);
ret.append((const char*)c->key, c->key_end - c->key);
}
else /*JSON_PATH_ARRAY*/
{
ret.append("[");
ret.append(to_string(c->n_item));
ret.append("]");
}
}
ret.append("\"");
}
catch (...)
{
return true;
}
return false;
}
} // namespace
namespace funcexp
{
const static int wildOne = '_';
const static int wildMany = '%';
int Func_json_search::cmpJSValWild(json_engine_t* jsEg, const string_view& cmpStr, const CHARSET_INFO* cs)
{
if (jsEg->value_type != JSON_VALUE_STRING || !jsEg->value_escaped)
return cs->wildcmp((const char*)jsEg->value, (const char*)(jsEg->value + jsEg->value_len),
(const char*)cmpStr.data(), (const char*)cmpStr.data() + cmpStr.size(), escape,
wildOne, wildMany)
? 0
: 1;
{
int strLen = (jsEg->value_len / 1024 + 1) * 1024;
char* buf = (char*)alloca(strLen);
if ((strLen = json_unescape(jsEg->s.cs, jsEg->value, jsEg->value + jsEg->value_len, jsEg->s.cs,
(uchar*)buf, (uchar*)(buf + strLen))) <= 0)
return 0;
return cs->wildcmp(buf, buf + strLen, cmpStr.data(), cmpStr.data() + cmpStr.size(), escape, wildOne,
wildMany)
? 0
: 1;
}
}
CalpontSystemCatalog::ColType Func_json_search::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_search::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
string ret;
bool isNullJS = false, isNullVal = false;
const string_view js = fp[0]->data()->getStrVal(row, isNull);
const string_view cmpStr = fp[2]->data()->getStrVal(row, isNull);
if (isNullJS || isNullVal)
{
isNull = true;
return "";
}
if (!isModeParsed)
{
if (!isModeConst)
isModeConst = (dynamic_cast<ConstantColumn*>(fp[1]->data()) != nullptr);
string mode = fp[1]->data()->getStrVal(row, isNull);
if (isNull)
return "";
transform(mode.begin(), mode.end(), mode.begin(), ::tolower);
if (mode != "one" && mode != "all")
{
isNull = true;
return "";
}
isModeOne = (mode == "one");
isModeParsed = isModeConst;
}
if (fp.size() >= 4)
{
if (dynamic_cast<ConstantColumn*>(fp[3]->data()) == nullptr)
{
isNull = true;
return "";
}
bool isNullEscape = false;
const string_view escapeStr = fp[3]->data()->getStrVal(row, isNullEscape);
if (escapeStr.size() > 1)
{
isNull = true;
return "";
}
escape = isNullEscape ? '\\' : escapeStr[0];
}
json_engine_t jsEg;
json_path_t p, savPath;
const CHARSET_INFO* cs = getCharset(fp[0]);
#ifdef MYSQL_GE_1009
int arrayCounter[JSON_DEPTH_LIMIT];
bool hasNegPath = 0;
#endif
int pathFound = 0;
initJSPaths(paths, fp, 4, 1);
for (size_t i = 4; i < fp.size(); i++)
{
JSONPath& path = paths[i - 4];
if (!path.parsed)
{
if (parseJSPath(path, row, fp[i]))
goto error;
#ifdef MYSQL_GE_1009
hasNegPath |= path.p.types_used & JSON_PATH_NEGATIVE_INDEX;
#endif
}
}
json_get_path_start(&jsEg, cs, (const uchar*)js.data(), (const uchar*)js.data() + js.size(), &p);
while (json_get_path_next(&jsEg, &p) == 0)
{
#ifdef MYSQL_GE_1009
if (hasNegPath && jsEg.value_type == JSON_VALUE_ARRAY &&
json_skip_array_and_count(&jsEg, arrayCounter + (p.last_step - p.steps)))
goto error;
#endif
if (json_value_scalar(&jsEg))
{
#ifdef MYSQL_GE_1009
bool isMatch = matchJSPath(paths, &p, jsEg.value_type, arrayCounter);
#else
bool isMatch = matchJSPath(paths, &p, jsEg.value_type);
#endif
if ((fp.size() < 5 || isMatch) && cmpJSValWild(&jsEg, cmpStr, cs) != 0)
{
++pathFound;
if (pathFound == 1)
{
savPath = p;
savPath.last_step = savPath.steps + (p.last_step - p.steps);
}
else
{
if (pathFound == 2)
{
ret.append("[");
if (appendJSPath(ret, &savPath))
goto error;
}
ret.append(", ");
if (appendJSPath(ret, &p))
goto error;
}
if (isModeOne)
goto end;
}
}
}
end:
if (pathFound == 0)
goto error;
if (pathFound == 1)
{
if (appendJSPath(ret, &savPath))
goto error;
}
else
ret.append("]");
isNull = false;
return ret;
error:
isNull = true;
return "";
}
} // namespace funcexp

View File

@ -0,0 +1,53 @@
#include "functor_json.h"
#include "functioncolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_type::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_type::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
json_engine_t jsEg;
string result;
initJSEngine(jsEg, getCharset(fp[0]), js);
if (json_read_value(&jsEg))
{
isNull = true;
return "";
}
switch (jsEg.value_type)
{
case JSON_VALUE_OBJECT: result = "OBJECT"; break;
case JSON_VALUE_ARRAY: result = "ARRAY"; break;
case JSON_VALUE_STRING: result = "STRING"; break;
case JSON_VALUE_NUMBER: result = (jsEg.num_flags & JSON_NUM_FRAC_PART) ? "DOUBLE" : "INTEGER"; break;
case JSON_VALUE_TRUE:
case JSON_VALUE_FALSE: result = "BOOLEAN"; break;
default: result = "NULL"; break;
}
return result;
}
} // namespace funcexp

View File

@ -0,0 +1,51 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "jsonhelpers.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_unquote::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
std::string Func_json_unquote::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
json_engine_t jsEg;
int strLen;
const CHARSET_INFO* cs = type.getCharset();
initJSEngine(jsEg, cs, js);
json_read_value(&jsEg);
if (unlikely(jsEg.s.error) || jsEg.value_type != JSON_VALUE_STRING)
return js.data();
char* buf = (char*)alloca(jsEg.value_len);
if ((strLen = json_unescape(cs, jsEg.value, jsEg.value + jsEg.value_len, &my_charset_utf8mb3_general_ci,
(uchar*)buf, (uchar*)(buf + jsEg.value_len))) >= 0)
{
buf[strLen] = '\0';
string ret = buf;
return strLen == 0 ? "" : ret;
}
return js.data();
}
} // namespace funcexp

View File

@ -0,0 +1,34 @@
#include <string_view>
using namespace std;
#include "functor_json.h"
#include "functioncolumn.h"
#include "rowgroup.h"
using namespace execplan;
using namespace rowgroup;
#include "dataconvert.h"
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
CalpontSystemCatalog::ColType Func_json_valid::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
/**
* getBoolVal API definition
*/
bool Func_json_valid::getBoolVal(Row& row, FunctionParm& fp, bool& isNull,
CalpontSystemCatalog::ColType& type)
{
const string_view js = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return false;
return json_valid(js.data(), js.size(), getCharset(fp[0]));
}
} // namespace funcexp

View File

@ -0,0 +1,122 @@
#include "functor_json.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
using namespace execplan;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "jsonhelpers.h"
using namespace funcexp::helpers;
namespace funcexp
{
bool JSONEgWrapper::checkAndGetScalar(string& ret, int* error)
{
CHARSET_INFO* cs;
const uchar* js;
uint jsLen;
if (!json_value_scalar(this))
{
/* We only look for scalar values! */
if (json_skip_level(this) || json_scan_next(this))
*error = 1;
return true;
}
if (value_type == JSON_VALUE_TRUE || value_type == JSON_VALUE_FALSE)
{
cs = &my_charset_utf8mb4_bin;
js = (const uchar*)((value_type == JSON_VALUE_TRUE) ? "1" : "0");
jsLen = 1;
}
else
{
cs = s.cs;
js = value;
jsLen = value_len;
}
int strLen = jsLen * cs->mbmaxlen;
char* buf = (char*)alloca(jsLen + strLen);
if ((strLen = json_unescape(cs, js, js + jsLen, cs, (uchar*)buf, (uchar*)buf + jsLen + strLen)) > 0)
{
buf[strLen] = '\0';
ret.append(buf);
return 0;
}
return strLen;
}
/*
Returns NULL, not an error if the found value
is not a scalar.
*/
bool JSONPathWrapper::extract(std::string& ret, rowgroup::Row& row, execplan::SPTP& funcParamJS,
execplan::SPTP& funcParamPath)
{
bool isNullJS = false, isNullPath = false;
const string& js = funcParamJS->data()->getStrVal(row, isNullJS);
const string_view jsp = funcParamPath->data()->getStrVal(row, isNullPath);
if (isNullJS || isNullPath)
return true;
int error = 0;
if (!parsed)
{
if (!constant)
{
ConstantColumn* constCol = dynamic_cast<ConstantColumn*>(funcParamPath->data());
constant = (constCol != nullptr);
}
if (isNullPath || json_path_setup(&p, getCharset(funcParamPath), (const uchar*)jsp.data(),
(const uchar*)jsp.data() + jsp.size()))
return true;
parsed = constant;
}
JSONEgWrapper je(js, getCharset(funcParamJS));
currStep = p.steps;
do
{
if (error)
return true;
IntType arrayCounters[JSON_DEPTH_LIMIT];
if (json_find_path(&je, &p, &currStep, arrayCounters))
return true;
if (json_read_value(&je))
return true;
} while (unlikely(checkAndGetValue(&je, ret, &error)));
return false;
}
CalpontSystemCatalog::ColType Func_json_value::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return fp[0]->data()->resultType();
}
string Func_json_value::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type)
{
string ret;
isNull = JSONPathWrapper::extract(ret, row, fp[0], fp[1]);
return isNull ? "" : ret;
}
} // namespace funcexp

View File

@ -21,7 +21,7 @@
*
*
****************************************************************************/
#include "functor_json.h"
#include <boost/thread/mutex.hpp>
#include "funcexp.h"
@ -143,6 +143,39 @@ FuncExp::FuncExp()
fFuncMap["isnottrue"] = new Func_IsNotTrue();
fFuncMap["isfalse"] = new Func_IsFalse();
fFuncMap["isnotfalse"] = new Func_IsNotFalse();
fFuncMap["json_array"] = new Func_json_array();
fFuncMap["json_array_append"] = new Func_json_array_append();
fFuncMap["json_array_insert"] = new Func_json_array_insert();
fFuncMap["json_contains"] = new Func_json_contains();
fFuncMap["json_contains_path"] = new Func_json_contains_path();
fFuncMap["json_compact"] = new Func_json_format(Func_json_format::COMPACT);
fFuncMap["json_depth"] = new Func_json_depth();
fFuncMap["json_equals"] = new Func_json_equals();
fFuncMap["json_exists"] = new Func_json_exists();
fFuncMap["json_extract"] = new Func_json_extract();
fFuncMap["json_format"] = new Func_json_format();
fFuncMap["json_insert"] = new Func_json_insert();
fFuncMap["json_keys"] = new Func_json_keys();
fFuncMap["json_length"] = new Func_json_length();
fFuncMap["json_loose"] = new Func_json_format(Func_json_format::LOOSE);
fFuncMap["json_merge"] = new Func_json_merge();
fFuncMap["json_merge_patch"] = new Func_json_merge_patch();
fFuncMap["json_merge_preserve"] = new Func_json_merge();
fFuncMap["json_normalize"] = new Func_json_normalize();
fFuncMap["json_object"] = new Func_json_object();
#ifdef MYSQL_GE_1009
fFuncMap["json_overlaps"] = new Func_json_overlaps();
#endif
fFuncMap["json_query"] = new Func_json_query();
fFuncMap["json_quote"] = new Func_json_quote();
fFuncMap["json_remove"] = new Func_json_remove();
fFuncMap["json_replace"] = new Func_json_insert(Func_json_insert::REPLACE);
fFuncMap["json_search"] = new Func_json_search();
fFuncMap["json_set"] = new Func_json_insert(Func_json_insert::SET);
fFuncMap["json_type"] = new Func_json_type();
fFuncMap["json_unquote"] = new Func_json_unquote();
fFuncMap["json_valid"] = new Func_json_valid();
fFuncMap["json_value"] = new Func_json_value();
fFuncMap["last_day"] = new Func_last_day();
fFuncMap["lcase"] = new Func_lcase(); // dlh
fFuncMap["least"] = new Func_least(); // dlh

View File

@ -0,0 +1,679 @@
#pragma once
#include <string>
#define PREFER_MY_CONFIG_H
#include <mariadb.h>
#include <mysql.h>
#include <my_sys.h>
#include <json_lib.h>
#include "collation.h"
#include "functor_bool.h"
#include "functor_int.h"
#include "functor_str.h"
// Check if mariadb version >= 10.9
#if MYSQL_VERSION_ID >= 100900
#ifndef MYSQL_GE_1009
#define MYSQL_GE_1009
#endif
#endif
namespace funcexp
{
// The json_path_t wrapper include some flags
struct JSONPath
{
public:
JSONPath() : constant(false), parsed(false), currStep(nullptr)
{
}
json_path_t p;
bool constant; // check if the argument is constant
bool parsed; // check if the argument is parsed
json_path_step_t* currStep;
};
class JSONEgWrapper : public json_engine_t
{
public:
JSONEgWrapper(CHARSET_INFO* cs, const uchar* str, const uchar* end)
{
json_scan_start(this, cs, str, end);
}
JSONEgWrapper(const std::string& str, CHARSET_INFO* cs)
: JSONEgWrapper(cs, (const uchar*)str.data(), (const uchar*)str.data() + str.size())
{
}
bool checkAndGetScalar(std::string& ret, int* error);
bool checkAndGetComplexVal(std::string& ret, int* error);
};
class JSONPathWrapper : public JSONPath
{
protected:
virtual ~JSONPathWrapper()
{
}
virtual bool checkAndGetValue(JSONEgWrapper* je, std::string& ret, int* error) = 0;
bool extract(std::string& ret, rowgroup::Row& row, execplan::SPTP& funcParmJS,
execplan::SPTP& funcParmPath);
};
/** @brief Func_json_valid class
*/
class Func_json_valid : public Func_Bool
{
public:
Func_json_valid() : Func_Bool("json_valid")
{
}
~Func_json_valid()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
bool getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_depth class
*/
class Func_json_depth : public Func_Int
{
public:
Func_json_depth() : Func_Int("json_depth")
{
}
virtual ~Func_json_depth()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
int64_t getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_length class
*/
class Func_json_length : public Func_Int
{
protected:
JSONPath path;
public:
Func_json_length() : Func_Int("json_length")
{
}
virtual ~Func_json_length()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
int64_t getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_equals class
*/
class Func_json_equals : public Func_Bool
{
public:
Func_json_equals() : Func_Bool("json_equals")
{
}
~Func_json_equals()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
bool getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_normalize class
*/
class Func_json_normalize : public Func_Str
{
public:
Func_json_normalize() : Func_Str("json_normalize")
{
}
virtual ~Func_json_normalize()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_type class
*/
class Func_json_type : public Func_Str
{
public:
Func_json_type() : Func_Str("json_type")
{
}
virtual ~Func_json_type()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_object class
*/
class Func_json_object : public Func_Str
{
public:
Func_json_object() : Func_Str("json_object")
{
}
virtual ~Func_json_object()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_array class
*/
class Func_json_array : public Func_Str
{
public:
Func_json_array() : Func_Str("json_array")
{
}
virtual ~Func_json_array()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_keys class
*/
class Func_json_keys : public Func_Str
{
protected:
JSONPath path;
public:
Func_json_keys() : Func_Str("json_keys")
{
}
virtual ~Func_json_keys()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_exists class
*/
class Func_json_exists : public Func_Bool
{
protected:
JSONPath path;
public:
Func_json_exists() : Func_Bool("json_exists")
{
}
~Func_json_exists()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
bool getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_quote class
*/
class Func_json_quote : public Func_Str
{
protected:
JSONPath path;
public:
Func_json_quote() : Func_Str("json_quote")
{
}
virtual ~Func_json_quote()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_unquote class
*/
class Func_json_unquote : public Func_Str
{
protected:
JSONPath path;
public:
Func_json_unquote() : Func_Str("json_unquote")
{
}
virtual ~Func_json_unquote()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_format class
*/
class Func_json_format : public Func_Str
{
public:
enum FORMATS
{
NONE,
COMPACT,
LOOSE,
DETAILED
};
protected:
FORMATS fmt;
public:
Func_json_format() : Func_Str("json_detailed"), fmt(DETAILED)
{
}
Func_json_format(FORMATS format) : fmt(format)
{
assert(format != NONE);
switch (format)
{
case DETAILED: Func_Str::Func::funcName("json_detailed"); break;
case LOOSE: Func_Str::Func::funcName("json_loose"); break;
case COMPACT: Func_Str::Func::funcName("json_compact"); break;
default: break;
}
}
virtual ~Func_json_format()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_merge_preserve class
*/
class Func_json_merge : public Func_Str
{
public:
Func_json_merge() : Func_Str("json_merge_preserve")
{
}
virtual ~Func_json_merge()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_merge_patch class
*/
class Func_json_merge_patch : public Func_Str
{
public:
Func_json_merge_patch() : Func_Str("json_merge_patch")
{
}
virtual ~Func_json_merge_patch()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_value class
*/
class Func_json_value : public Func_Str, public JSONPathWrapper
{
public:
Func_json_value() : Func_Str("json_value")
{
}
virtual ~Func_json_value()
{
}
bool checkAndGetValue(JSONEgWrapper* je, string& res, int* error) override
{
return je->checkAndGetScalar(res, error);
}
execplan::CalpontSystemCatalog::ColType operationType(
FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType) override;
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type) override;
};
/** @brief Func_json_query class
*/
class Func_json_query : public Func_Str, public JSONPathWrapper
{
public:
Func_json_query() : Func_Str("json_query")
{
}
virtual ~Func_json_query()
{
}
bool checkAndGetValue(JSONEgWrapper* je, string& res, int* error) override
{
return je->checkAndGetComplexVal(res, error);
}
execplan::CalpontSystemCatalog::ColType operationType(
FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType) override;
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type) override;
};
/** @brief Func_json_contains class
*/
class Func_json_contains : public Func_Bool
{
protected:
JSONPath path;
bool arg2Const;
bool arg2Parsed; // argument 2 is a constant or has been parsed
std::string_view arg2Val;
public:
Func_json_contains() : Func_Bool("json_contains"), arg2Const(false), arg2Parsed(false), arg2Val("")
{
}
virtual ~Func_json_contains()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
bool getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_array_append class
*/
class Func_json_array_append : public Func_Str
{
protected:
std::vector<JSONPath> paths;
public:
Func_json_array_append() : Func_Str("json_array_append")
{
}
virtual ~Func_json_array_append()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
private:
static const int padding = 8;
};
/** @brief Func_json_array_insert class
*/
class Func_json_array_insert : public Func_Str
{
protected:
std::vector<JSONPath> paths;
public:
Func_json_array_insert() : Func_Str("json_array_insert")
{
}
virtual ~Func_json_array_insert()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_insert class
*/
class Func_json_insert : public Func_Str
{
public:
enum MODE
{
NONE,
INSERT,
REPLACE,
SET
};
protected:
MODE mode;
std::vector<JSONPath> paths;
public:
Func_json_insert() : Func_Str("json_insert"), mode(INSERT)
{
}
Func_json_insert(MODE m) : mode(m)
{
assert(m != NONE);
switch (m)
{
case INSERT: Func_Str::Func::funcName("json_insert"); break;
case REPLACE: Func_Str::Func::funcName("json_replace"); break;
case SET: Func_Str::Func::funcName("json_set"); break;
default: break;
}
}
virtual ~Func_json_insert()
{
}
MODE getMode() const
{
return mode;
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_remove class
*/
class Func_json_remove : public Func_Str
{
protected:
std::vector<JSONPath> paths;
public:
Func_json_remove() : Func_Str("json_remove")
{
}
virtual ~Func_json_remove()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_contains_path class
*/
class Func_json_contains_path : public Func_Bool
{
protected:
std::vector<JSONPath> paths;
std::vector<bool> hasFound;
bool isModeOne;
bool isModeConst;
bool isModeParsed;
public:
Func_json_contains_path()
: Func_Bool("json_contains_path"), isModeOne(false), isModeConst(false), isModeParsed(false)
{
}
virtual ~Func_json_contains_path()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
bool getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_overlaps class
*/
class Func_json_overlaps : public Func_Bool
{
protected:
JSONPath path;
public:
Func_json_overlaps() : Func_Bool("json_overlaps")
{
}
virtual ~Func_json_overlaps()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
bool getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
};
/** @brief Func_json_search class
*/
class Func_json_search : public Func_Str
{
protected:
std::vector<JSONPath> paths;
bool isModeParsed;
bool isModeConst;
bool isModeOne;
int escape;
public:
Func_json_search()
: Func_Str("json_search"), isModeParsed(false), isModeConst(false), isModeOne(false), escape('\\')
{
}
virtual ~Func_json_search()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
private:
int cmpJSValWild(json_engine_t* jsEg, const string_view& cmpStr, const CHARSET_INFO* cs);
};
/** @brief Func_json_extract_string class
*/
class Func_json_extract : public Func_Str
{
protected:
std::vector<JSONPath> paths;
public:
Func_json_extract() : Func_Str("json_extract")
{
}
virtual ~Func_json_extract()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
int64_t getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
double getDoubleVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
execplan::IDB_Decimal getDecimalVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& type);
private:
int doExtract(rowgroup::Row& row, FunctionParm& fp, json_value_types* type, std::string& retJS,
bool compareWhole);
};
} // namespace funcexp

View File

@ -0,0 +1,374 @@
#include "jsonhelpers.h"
using namespace std;
namespace funcexp
{
namespace helpers
{
int setupJSPath(json_path_t* path, CHARSET_INFO* cs, const string_view& str, bool wildcards = true)
{
int err = json_path_setup(path, cs, (const uchar*)str.data(), (const uchar*)str.data() + str.size());
if (wildcards)
return err;
if (!err)
{
#ifdef MYSQL_GE_1009
bool support = (path->types_used & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | JSON_PATH_ARRAY_RANGE)) == 0;
#else
bool support = (path->types_used & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0;
#endif
if (support)
return 0;
path->s.error = NO_WILDCARD_ALLOWED;
}
return 1;
}
bool appendEscapedJS(string& ret, const CHARSET_INFO* retCS, const string_view& js, const CHARSET_INFO* jsCS)
{
const int jsLen = js.size();
const char* rawJS = js.data();
int strLen = jsLen * 12 * jsCS->mbmaxlen / jsCS->mbminlen;
char* buf = (char*)alloca(strLen);
if ((strLen = json_escape(retCS, (const uchar*)rawJS, (const uchar*)rawJS + jsLen, jsCS, (uchar*)buf,
(uchar*)buf + strLen)) > 0)
{
buf[strLen] = '\0';
ret.append(buf, strLen);
return false;
}
return true;
}
bool appendJSKeyName(string& ret, const CHARSET_INFO* retCS, rowgroup::Row& row, execplan::SPTP& parm)
{
bool nullVal = false;
const string_view js = parm->data()->getStrVal(row, nullVal);
if (nullVal)
{
ret.append("\"\": ");
return false;
}
ret.append("\"");
if (appendEscapedJS(ret, retCS, js, parm->data()->resultType().getCharset()))
return true;
ret.append("\": ");
return false;
}
bool appendJSValue(string& ret, const CHARSET_INFO* retCS, rowgroup::Row& row, execplan::SPTP& parm)
{
bool nullVal = false;
const string_view js = parm->data()->getStrVal(row, nullVal);
if (nullVal)
{
ret.append("null");
return false;
}
datatypes::SystemCatalog::ColDataType dataType = parm->data()->resultType().colDataType;
if (dataType == datatypes::SystemCatalog::BIGINT && (js == "true" || js == "false"))
{
ret.append(js);
return false;
}
const CHARSET_INFO* jsCS = parm->data()->resultType().getCharset();
if (isCharType(dataType))
{
ret.append("\"");
if (appendEscapedJS(ret, retCS, js, jsCS))
return true;
ret.append("\"");
return false;
}
return appendEscapedJS(ret, retCS, js, jsCS);
}
int appendTab(string& js, const int depth, const int tabSize)
{
try
{
js.append("\n");
for (int i = 0; i < depth; i++)
{
js.append(tab_arr, tabSize);
}
}
catch (const std::exception& e)
{
return 1;
}
return 0;
}
int doFormat(json_engine_t* je, string& niceJS, Func_json_format::FORMATS mode, int tabSize)
{
int depth = 0;
static const char *comma = ", ", *colon = "\": ";
uint commaLen, colonLen;
int firstValue = 1;
niceJS.reserve(je->s.str_end - je->s.c_str + 32);
assert(mode != Func_json_format::DETAILED || (tabSize >= 0 && tabSize <= TAB_SIZE_LIMIT));
if (mode == Func_json_format::LOOSE)
{
commaLen = 2;
colonLen = 3;
}
else if (mode == Func_json_format::DETAILED)
{
commaLen = 1;
colonLen = 3;
}
else
{
commaLen = 1;
colonLen = 2;
}
do
{
switch (je->state)
{
case JST_KEY:
{
const uchar* key_start = je->s.c_str;
const uchar* key_end;
do
{
key_end = je->s.c_str;
} while (json_read_keyname_chr(je) == 0);
if (unlikely(je->s.error))
goto error;
if (!firstValue)
niceJS.append(comma, commaLen);
if (mode == Func_json_format::DETAILED && appendTab(niceJS, depth, tabSize))
goto error;
niceJS.append("\"");
niceJS.append((const char*)key_start, (int)(key_end - key_start));
niceJS.append(colon, colonLen);
}
/* now we have key value to handle, so no 'break'. */
DBUG_ASSERT(je->state == JST_VALUE);
goto handle_value;
case JST_VALUE:
if (!firstValue)
niceJS.append(comma, commaLen);
if (mode == Func_json_format::DETAILED && depth > 0 && appendTab(niceJS, depth, tabSize))
goto error;
handle_value:
if (json_read_value(je))
goto error;
if (json_value_scalar(je))
{
niceJS.append((const char*)je->value_begin, (int)(je->value_end - je->value_begin));
firstValue = 0;
}
else
{
if (mode == Func_json_format::DETAILED && depth > 0 && appendTab(niceJS, depth, tabSize))
goto error;
niceJS.append((je->value_type == JSON_VALUE_OBJECT) ? "{" : "[");
firstValue = 1;
depth++;
}
break;
case JST_OBJ_END:
case JST_ARRAY_END:
depth--;
if (mode == Func_json_format::DETAILED && appendTab(niceJS, depth, tabSize))
goto error;
niceJS.append((je->state == JST_OBJ_END) ? "}" : "]");
firstValue = 0;
break;
default: break;
};
} while (json_scan_next(je) == 0);
return je->s.error || *je->killed_ptr;
error:
return 1;
}
bool findKeyInObject(json_engine_t* jsEg, json_string_t* key)
{
const uchar* str = key->c_str;
while (json_scan_next(jsEg) == 0 && jsEg->state != JST_OBJ_END)
{
DBUG_ASSERT(jsEg->state == JST_KEY);
if (json_key_matches(jsEg, key))
return true;
if (json_skip_key(jsEg))
return false;
key->c_str = str;
}
return false;
}
int cmpPartJSPath(const json_path_step_t* a, const json_path_step_t* aEnd, const json_path_step_t* b,
const json_path_step_t* bEnd, enum json_value_types vt, const int* arraySize)
{
int ret, ret2;
const json_path_step_t* tmpB = b;
while (a <= aEnd)
{
if (b > bEnd)
{
while (vt != JSON_VALUE_ARRAY && (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY && a->n_item == 0)
{
if (++a > aEnd)
return 0;
}
return -2;
}
DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0);
if (a->type & JSON_PATH_ARRAY)
{
if (b->type & JSON_PATH_ARRAY)
{
#ifdef MYSQL_GE_1009
int ret = 0, corrected_n_item_a = 0;
if (arraySize)
corrected_n_item_a = a->n_item < 0 ? arraySize[b - tmpB] + a->n_item : a->n_item;
if (a->type & JSON_PATH_ARRAY_RANGE)
{
int corrected_n_item_end_a = 0;
if (arraySize)
corrected_n_item_end_a = a->n_item_end < 0 ? arraySize[b - tmpB] + a->n_item_end : a->n_item_end;
ret = b->n_item >= corrected_n_item_a && b->n_item <= corrected_n_item_end_a;
}
else
ret = corrected_n_item_a == b->n_item;
if ((a->type & JSON_PATH_WILD) || ret)
goto step_fits;
goto step_failed;
#else
if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
goto step_fits;
goto step_failed;
#endif
}
if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0)
goto step_fits_autowrap;
goto step_failed;
}
else /* JSON_PATH_KEY */
{
if (!(b->type & JSON_PATH_KEY))
goto step_failed;
if (!(a->type & JSON_PATH_WILD) &&
(a->key_end - a->key != b->key_end - b->key || memcmp(a->key, b->key, a->key_end - a->key) != 0))
goto step_failed;
goto step_fits;
}
step_failed:
if (!(a->type & JSON_PATH_DOUBLE_WILD))
return -1;
b++;
continue;
step_fits:
b++;
if (!(a->type & JSON_PATH_DOUBLE_WILD))
{
a++;
continue;
}
/* Double wild handling needs recursions. */
ret = cmpPartJSPath(a + 1, aEnd, b, bEnd, vt, arraySize ? arraySize + (b - tmpB) : NULL);
if (ret == 0)
return 0;
ret2 = cmpPartJSPath(a, aEnd, b, bEnd, vt, arraySize ? arraySize + (b - tmpB) : NULL);
return (ret2 >= 0) ? ret2 : ret;
step_fits_autowrap:
if (!(a->type & JSON_PATH_DOUBLE_WILD))
{
a++;
continue;
}
/* Double wild handling needs recursions. */
ret = cmpPartJSPath(a + 1, aEnd, b + 1, bEnd, vt, arraySize ? arraySize + (b - tmpB) : NULL);
if (ret == 0)
return 0;
ret2 = cmpPartJSPath(a, aEnd, b + 1, bEnd, vt, arraySize ? arraySize + (b - tmpB) : NULL);
return (ret2 >= 0) ? ret2 : ret;
}
return b <= bEnd;
}
int cmpJSPath(const json_path_t* a, const json_path_t* b, enum json_value_types vt, const int* arraySize)
{
return cmpPartJSPath(a->steps + 1, a->last_step, b->steps + 1, b->last_step, vt, arraySize);
}
int parseJSPath(JSONPath& path, rowgroup::Row& row, execplan::SPTP& parm, bool wildcards)
{
// check if path column is const
if (!path.constant)
markConstFlag(path, parm);
bool isNull = false;
const string_view jsp = parm->data()->getStrVal(row, isNull);
if (isNull || setupJSPath(&path.p, getCharset(parm), jsp, wildcards))
return 1;
path.parsed = path.constant;
return 0;
}
bool matchJSPath(const vector<funcexp::JSONPath>& paths, const json_path_t* p, json_value_types valType,
const int* arrayCounter, bool exact)
{
for (size_t curr = 0; curr < paths.size(); curr++)
{
#ifdef MYSQL_GE_1009
int cmp = cmpJSPath(&paths[curr].p, p, valType, arrayCounter);
#else
int cmp = cmpJSPath(&paths[curr].p, p, valType);
#endif
bool ret = exact ? cmp >= 0 : cmp == 0;
if (ret)
return true;
}
return false;
}
} // namespace helpers
} // namespace funcexp

107
utils/funcexp/jsonhelpers.h Normal file
View File

@ -0,0 +1,107 @@
#pragma once
#include <cstddef>
#include <string>
#include <string_view>
#include <vector>
#define PREFER_MY_CONFIG_H
#include <mariadb.h>
#include <mysql.h>
#include <my_sys.h>
#include <json_lib.h>
#include "collation.h"
#include "functor_json.h"
#include "functor_str.h"
#include "collation.h"
#include "rowgroup.h"
#include "treenode.h"
#include "functioncolumn.h"
#include "constantcolumn.h"
namespace funcexp
{
namespace helpers
{
static const int NO_WILDCARD_ALLOWED = 1;
/*
Checks if the path has '.*' '[*]' or '**' constructions
and sets the NO_WILDCARD_ALLOWED error if the case.
*/
int setupJSPath(json_path_t* path, CHARSET_INFO* cs, const string_view& str, bool wildcards);
// Return true if err occur, let the outer function handle the exception
bool appendEscapedJS(string& ret, const CHARSET_INFO* retCS, const string_view& js, const CHARSET_INFO* jsCS);
bool appendJSKeyName(string& ret, const CHARSET_INFO* retCS, rowgroup::Row& row, execplan::SPTP& parm);
bool appendJSValue(string& ret, const CHARSET_INFO* retCS, rowgroup::Row& row, execplan::SPTP& parm);
static const int TAB_SIZE_LIMIT = 8;
static const char tab_arr[TAB_SIZE_LIMIT + 1] = " ";
// Format the json using format mode
int doFormat(json_engine_t* je, string& niceJS, Func_json_format::FORMATS mode, int tabSize = 4);
static const int SHOULD_END_WITH_ARRAY = 2;
static const int TRIVIAL_PATH_NOT_ALLOWED = 3;
bool findKeyInObject(json_engine_t* jsEg, json_string_t* key);
#ifdef MYSQL_GE_1009
using IntType = int;
#else
using IntType = uint;
#endif
/*
Compatible with json_find_path function in json_lib
before 10.9: uint* array_counters
after 10.9: int* array_counters
*/
inline static int locateJSPath(json_engine_t& jsEg, JSONPath& path, int* jsErr = nullptr)
{
IntType arrayCounters[JSON_DEPTH_LIMIT];
path.currStep = path.p.steps;
if (json_find_path(&jsEg, &path.p, &path.currStep, arrayCounters))
{
if (jsErr && jsEg.s.error)
*jsErr = 1;
return 1;
}
return 0;
}
// Check and set the constant flag from function parameters
inline static void markConstFlag(JSONPath& path, const execplan::SPTP& parm)
{
path.constant = (dynamic_cast<execplan::ConstantColumn*>(parm->data()) != nullptr);
}
int cmpJSPath(const json_path_t* a, const json_path_t* b, enum json_value_types vt,
const int* arraySize = nullptr);
inline const CHARSET_INFO* getCharset(execplan::SPTP& parm)
{
return parm->data()->resultType().getCharset();
}
inline void initJSEngine(json_engine_t& jsEg, const CHARSET_INFO* jsCS, const string_view& js)
{
json_scan_start(&jsEg, jsCS, (const uchar*)js.data(), (const uchar*)js.data() + js.size());
}
int parseJSPath(JSONPath& path, rowgroup::Row& row, execplan::SPTP& parm, bool wildcards = true);
inline void initJSPaths(vector<JSONPath>& paths, FunctionParm& fp, const int start, const int step)
{
if (paths.size() == 0)
for (size_t i = start; i < fp.size(); i += step)
paths.push_back(JSONPath{});
}
bool matchJSPath(const vector<funcexp::JSONPath>& paths, const json_path_t* p, json_value_types valType,
const int* arrayCounter = nullptr, bool exact = true);
} // namespace helpers
} // namespace funcexp