diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c index 87ae60e490f..7f322485e7b 100644 --- a/src/backend/utils/adt/jsonpath.c +++ b/src/backend/utils/adt/jsonpath.c @@ -557,7 +557,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, if (v->content.like_regex.flags & JSP_REGEX_ICASE) appendStringInfoChar(buf, 'i'); - if (v->content.like_regex.flags & JSP_REGEX_SLINE) + if (v->content.like_regex.flags & JSP_REGEX_DOTALL) appendStringInfoChar(buf, 's'); if (v->content.like_regex.flags & JSP_REGEX_MLINE) appendStringInfoChar(buf, 'm'); diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index 21106e1da86..565b00c4266 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -1646,34 +1646,10 @@ executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg, /* Cache regex text and converted flags. */ if (!cxt->regex) { - uint32 flags = jsp->content.like_regex.flags; - cxt->regex = cstring_to_text_with_len(jsp->content.like_regex.pattern, jsp->content.like_regex.patternlen); - - /* Convert regex flags. */ - cxt->cflags = REG_ADVANCED; - - if (flags & JSP_REGEX_ICASE) - cxt->cflags |= REG_ICASE; - if (flags & JSP_REGEX_MLINE) - cxt->cflags |= REG_NEWLINE; - if (flags & JSP_REGEX_SLINE) - cxt->cflags &= ~REG_NEWLINE; - if (flags & JSP_REGEX_WSPACE) - cxt->cflags |= REG_EXPANDED; - - /* - * 'q' flag can work together only with 'i'. When other is specified, - * then 'q' has no effect. - */ - if ((flags & JSP_REGEX_QUOTE) && - !(flags & (JSP_REGEX_MLINE | JSP_REGEX_SLINE | JSP_REGEX_WSPACE))) - { - cxt->cflags &= ~REG_ADVANCED; - cxt->cflags |= REG_QUOTE; - } + cxt->cflags = jspConvertRegexFlags(jsp->content.like_regex.flags); } if (RE_compile_and_execute(cxt->regex, str->val.string.val, diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index 91b4b2f5985..1725502ede2 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -481,42 +481,32 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, { JsonPathParseItem *v = makeItemType(jpiLikeRegex); int i; - int cflags = REG_ADVANCED; + int cflags; v->value.like_regex.expr = expr; v->value.like_regex.pattern = pattern->val; v->value.like_regex.patternlen = pattern->len; - v->value.like_regex.flags = 0; + /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */ + v->value.like_regex.flags = 0; for (i = 0; flags && i < flags->len; i++) { switch (flags->val[i]) { case 'i': v->value.like_regex.flags |= JSP_REGEX_ICASE; - cflags |= REG_ICASE; break; case 's': - v->value.like_regex.flags &= ~JSP_REGEX_MLINE; - v->value.like_regex.flags |= JSP_REGEX_SLINE; - cflags |= REG_NEWLINE; + v->value.like_regex.flags |= JSP_REGEX_DOTALL; break; case 'm': - v->value.like_regex.flags &= ~JSP_REGEX_SLINE; v->value.like_regex.flags |= JSP_REGEX_MLINE; - cflags &= ~REG_NEWLINE; break; case 'x': v->value.like_regex.flags |= JSP_REGEX_WSPACE; - cflags |= REG_EXPANDED; break; case 'q': v->value.like_regex.flags |= JSP_REGEX_QUOTE; - if (!(v->value.like_regex.flags & (JSP_REGEX_MLINE | JSP_REGEX_SLINE | JSP_REGEX_WSPACE))) - { - cflags &= ~REG_ADVANCED; - cflags |= REG_QUOTE; - } break; default: ereport(ERROR, @@ -528,6 +518,9 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, } } + /* Convert flags to what RE_compile_and_cache needs */ + cflags = jspConvertRegexFlags(v->value.like_regex.flags); + /* check regex validity */ (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val, pattern->len), @@ -536,6 +529,49 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, return v; } +/* + * Convert from XQuery regex flags to those recognized by our regex library. + */ +int +jspConvertRegexFlags(uint32 xflags) +{ + /* By default, XQuery is very nearly the same as Spencer's AREs */ + int cflags = REG_ADVANCED; + + /* Ignore-case means the same thing, too, modulo locale issues */ + if (xflags & JSP_REGEX_ICASE) + cflags |= REG_ICASE; + + /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */ + if (xflags & JSP_REGEX_QUOTE) + { + cflags &= ~REG_ADVANCED; + cflags |= REG_QUOTE; + } + else + { + /* Note that dotall mode is the default in POSIX */ + if (!(xflags & JSP_REGEX_DOTALL)) + cflags |= REG_NLSTOP; + if (xflags & JSP_REGEX_MLINE) + cflags |= REG_NLANCH; + + /* + * XQuery's 'x' mode is related to Spencer's expanded mode, but it's + * not really enough alike to justify treating JSP_REGEX_WSPACE as + * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in + * future we'll modify the regex library to have an option for + * XQuery-style ignore-whitespace mode. + */ + if (xflags & JSP_REGEX_WSPACE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented"))); + } + + return cflags; +} + /* * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is * unavoidable because jsonpath_gram does not create a .h file to export its diff --git a/src/include/utils/jsonpath.h b/src/include/utils/jsonpath.h index 40ad5fda928..8458346bd4c 100644 --- a/src/include/utils/jsonpath.h +++ b/src/include/utils/jsonpath.h @@ -88,9 +88,9 @@ typedef enum JsonPathItemType /* XQuery regex mode flags for LIKE_REGEX predicate */ #define JSP_REGEX_ICASE 0x01 /* i flag, case insensitive */ -#define JSP_REGEX_SLINE 0x02 /* s flag, single-line mode */ -#define JSP_REGEX_MLINE 0x04 /* m flag, multi-line mode */ -#define JSP_REGEX_WSPACE 0x08 /* x flag, expanded syntax */ +#define JSP_REGEX_DOTALL 0x02 /* s flag, dot matches newline */ +#define JSP_REGEX_MLINE 0x04 /* m flag, ^/$ match at newlines */ +#define JSP_REGEX_WSPACE 0x08 /* x flag, ignore whitespace in pattern */ #define JSP_REGEX_QUOTE 0x10 /* q flag, no special characters */ /* @@ -245,4 +245,6 @@ typedef struct JsonPathParseResult extern JsonPathParseResult *parsejsonpath(const char *str, int len); +extern int jspConvertRegexFlags(uint32 xflags); + #endif diff --git a/src/test/regress/expected/jsonb_jsonpath.out b/src/test/regress/expected/jsonb_jsonpath.out index 0202667a1f7..d9618f2d887 100644 --- a/src/test/regress/expected/jsonb_jsonpath.out +++ b/src/test/regress/expected/jsonb_jsonpath.out @@ -1592,14 +1592,14 @@ select jsonb_path_query('[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ starts wit 1 (2 rows) -select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")'); +select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c")'); jsonb_path_query ------------------ "abc" "abdacb" (2 rows) -select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^a b.* c " flag "ix")'); +select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")'); jsonb_path_query ------------------ "abc" @@ -1607,7 +1607,7 @@ select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", " "abdacb" (3 rows) -select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")'); +select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")'); jsonb_path_query ------------------ "abc" @@ -1615,12 +1615,13 @@ select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", " "adc\nabc" (3 rows) -select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")'); +select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")'); jsonb_path_query ------------------ "abc" "abdacb" -(2 rows) + "ab\nadc" +(3 rows) select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "q")'); jsonb_path_query diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out index ecdd453942b..ea42ae367a3 100644 --- a/src/test/regress/expected/jsonpath.out +++ b/src/test/regress/expected/jsonpath.out @@ -442,17 +442,15 @@ select '$ ? (@ like_regex "pattern" flag "is")'::jsonpath; (1 row) select '$ ? (@ like_regex "pattern" flag "isim")'::jsonpath; - jsonpath --------------------------------------- - $?(@ like_regex "pattern" flag "im") + jsonpath +--------------------------------------- + $?(@ like_regex "pattern" flag "ism") (1 row) select '$ ? (@ like_regex "pattern" flag "xsms")'::jsonpath; - jsonpath --------------------------------------- - $?(@ like_regex "pattern" flag "sx") -(1 row) - +ERROR: XQuery "x" flag (expanded regular expressions) is not implemented +LINE 1: select '$ ? (@ like_regex "pattern" flag "xsms")'::jsonpath; + ^ select '$ ? (@ like_regex "pattern" flag "q")'::jsonpath; jsonpath ------------------------------------- @@ -466,9 +464,9 @@ select '$ ? (@ like_regex "pattern" flag "iq")'::jsonpath; (1 row) select '$ ? (@ like_regex "pattern" flag "smixq")'::jsonpath; - jsonpath ----------------------------------------- - $?(@ like_regex "pattern" flag "imxq") + jsonpath +----------------------------------------- + $?(@ like_regex "pattern" flag "ismxq") (1 row) select '$ ? (@ like_regex "pattern" flag "a")'::jsonpath; diff --git a/src/test/regress/sql/jsonb_jsonpath.sql b/src/test/regress/sql/jsonb_jsonpath.sql index e7629fb7f9d..ae8549d5536 100644 --- a/src/test/regress/sql/jsonb_jsonpath.sql +++ b/src/test/regress/sql/jsonb_jsonpath.sql @@ -335,10 +335,10 @@ select jsonb_path_query('[[null, 1, "abc", "abcabc"]]', 'lax $ ? (@[*] starts wi select jsonb_path_query('[[null, 1, "abd", "abdabc"]]', 'lax $ ? ((@[*] starts with "abc") is unknown)'); select jsonb_path_query('[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ starts with "abc") is unknown)'); -select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")'); -select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^a b.* c " flag "ix")'); -select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")'); -select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")'); +select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c")'); +select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")'); +select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")'); +select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")'); select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "q")'); select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "")'); select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "^a\\b$" flag "q")');