diff --git a/src/backend/regex/Makefile b/src/backend/regex/Makefile index 21e7fa5329b..74a4c0c89d8 100644 --- a/src/backend/regex/Makefile +++ b/src/backend/regex/Makefile @@ -12,7 +12,7 @@ subdir = src/backend/regex top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = regcomp.o regerror.o regexec.o regfree.o +OBJS = regcomp.o regerror.o regexec.o regfree.o regprefix.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/regex/README b/src/backend/regex/README index 89ba6a62ea2..c5d21e8c99d 100644 --- a/src/backend/regex/README +++ b/src/backend/regex/README @@ -7,12 +7,13 @@ So this file is an attempt to reverse-engineer some docs. General source-file layout -------------------------- -There are four separately-compilable source files, each exposing exactly +There are five separately-compilable source files, each exposing exactly one exported function: regcomp.c: pg_regcomp regexec.c: pg_regexec regerror.c: pg_regerror regfree.c: pg_regfree + regprefix.c: pg_regprefix (The pg_ prefixes were added by the Postgres project to distinguish this library version from any similar one that might be present on a particular system. They'd need to be removed or replaced in any standalone version @@ -44,6 +45,7 @@ regexec.c Top-level regex execution code rege_dfa.c DFA creation and execution regerror.c pg_regerror: generate text for a regex error code regfree.c pg_regfree: API to free a no-longer-needed regex_t +regprefix.c Code for extracting a common prefix from a regex_t The locale-specific code is concerned primarily with case-folding and with expanding locale-specific character classes, such as [[:alnum:]]. It diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c index 2aeb861d976..1c60566fbf5 100644 --- a/src/backend/regex/regc_color.c +++ b/src/backend/regex/regc_color.c @@ -66,8 +66,9 @@ initcm(struct vars * v, cd = cm->cd; /* cm->cd[WHITE] */ cd->sub = NOSUB; cd->arcs = NULL; - cd->flags = 0; + cd->firstchr = CHR_MIN; cd->nchrs = CHR_MAX - CHR_MIN + 1; + cd->flags = 0; /* upper levels of tree */ for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--) @@ -272,6 +273,7 @@ newcolor(struct colormap * cm) cd->nchrs = 0; cd->sub = NOSUB; cd->arcs = NULL; + cd->firstchr = CHR_MIN; /* in case never set otherwise */ cd->flags = 0; cd->block = NULL; @@ -371,6 +373,8 @@ subcolor(struct colormap * cm, chr c) if (co == sco) /* already in an open subcolor */ return co; /* rest is redundant */ cm->cd[co].nchrs--; + if (cm->cd[sco].nchrs == 0) + cm->cd[sco].firstchr = c; cm->cd[sco].nchrs++; setcolor(cm, c, sco); return sco; @@ -438,6 +442,11 @@ subrange(struct vars * v, /* * subblock - allocate new subcolors for one tree block of chrs, fill in arcs + * + * Note: subcolors that are created during execution of this function + * will not be given a useful value of firstchr; it'll be left as CHR_MIN. + * For the current usage of firstchr in pg_regprefix, this does not matter + * because such subcolors won't occur in the common prefix of a regex. */ static void subblock(struct vars * v, diff --git a/src/backend/regex/regprefix.c b/src/backend/regex/regprefix.c new file mode 100644 index 00000000000..6f91288e9df --- /dev/null +++ b/src/backend/regex/regprefix.c @@ -0,0 +1,259 @@ +/*------------------------------------------------------------------------- + * + * regprefix.c + * Extract a common prefix, if any, from a compiled regex. + * + * + * Portions Copyright (c) 2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1998, 1999 Henry Spencer + * + * IDENTIFICATION + * src/backend/regex/regprefix.c + * + *------------------------------------------------------------------------- + */ + +#include "regex/regguts.h" + + +/* + * forward declarations + */ +static int findprefix(struct cnfa * cnfa, struct colormap * cm, + chr *string, size_t *slength); + + +/* + * pg_regprefix - get common prefix for regular expression + * + * Returns one of: + * REG_NOMATCH: there is no common prefix of strings matching the regex + * REG_PREFIX: there is a common prefix of strings matching the regex + * REG_EXACT: all strings satisfying the regex must match the same string + * or a REG_XXX error code + * + * In the non-failure cases, *string is set to a malloc'd string containing + * the common prefix or exact value, of length *slength (measured in chrs + * not bytes!). + * + * This function does not analyze all complex cases (such as lookahead + * constraints) exactly. Therefore it is possible that some strings matching + * the reported prefix or exact-match string do not satisfy the regex. But + * it should never be the case that a string satisfying the regex does not + * match the reported prefix or exact-match string. + */ +int +pg_regprefix(regex_t *re, + chr **string, + size_t *slength) +{ + struct guts *g; + struct cnfa *cnfa; + int st; + + /* sanity checks */ + if (string == NULL || slength == NULL) + return REG_INVARG; + *string = NULL; /* initialize for failure cases */ + *slength = 0; + if (re == NULL || re->re_magic != REMAGIC) + return REG_INVARG; + if (re->re_csize != sizeof(chr)) + return REG_MIXED; + + /* Initialize locale-dependent support */ + pg_set_regex_collation(re->re_collation); + + /* setup */ + g = (struct guts *) re->re_guts; + if (g->info & REG_UIMPOSSIBLE) + return REG_NOMATCH; + + /* + * This implementation considers only the search NFA for the topmost regex + * tree node. Therefore, constraints such as backrefs are not fully + * applied, which is allowed per the function's API spec. + */ + assert(g->tree != NULL); + cnfa = &g->tree->cnfa; + + /* + * Since a correct NFA should never contain any exit-free loops, it should + * not be possible for our traversal to return to a previously visited + * NFA state. Hence we need at most nstates chrs in the output string. + */ + *string = (chr *) MALLOC(cnfa->nstates * sizeof(chr)); + if (*string == NULL) + return REG_ESPACE; + + /* do it */ + st = findprefix(cnfa, &g->cmap, *string, slength); + + assert(*slength <= cnfa->nstates); + + /* clean up */ + if (st != REG_PREFIX && st != REG_EXACT) + { + FREE(*string); + *string = NULL; + *slength = 0; + } + + return st; +} + +/* + * findprefix - extract common prefix from cNFA + * + * Results are returned into the preallocated chr array string[], with + * *slength (which must be preset to zero) incremented for each chr. + */ +static int /* regprefix return code */ +findprefix(struct cnfa * cnfa, + struct colormap * cm, + chr *string, + size_t *slength) +{ + int st; + int nextst; + color thiscolor; + chr c; + struct carc *ca; + + /* + * The "pre" state must have only BOS/BOL outarcs, else pattern isn't + * anchored left. If we have both BOS and BOL, they must go to the + * same next state. + */ + st = cnfa->pre; + nextst = -1; + for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++) + { + if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1]) + { + if (nextst == -1) + nextst = ca->to; + else if (nextst != ca->to) + return REG_NOMATCH; + } + else + return REG_NOMATCH; + } + if (nextst == -1) + return REG_NOMATCH; + + /* + * Scan through successive states, stopping as soon as we find one with + * more than one acceptable transition character (either multiple colors + * on out-arcs, or a color with more than one member chr). + * + * We could find a state with multiple out-arcs that are all labeled with + * the same singleton color; this comes from patterns like "^ab(cde|cxy)". + * In that case we add the chr "c" to the output string but then exit the + * loop with nextst == -1. This leaves a little bit on the table: if the + * pattern is like "^ab(cde|cdy)", we won't notice that "d" could be added + * to the prefix. But chasing multiple parallel state chains doesn't seem + * worth the trouble. + */ + do + { + st = nextst; + nextst = -1; + thiscolor = COLORLESS; + for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++) + { + /* We ignore lookahead constraints */ + if (ca->co >= cnfa->ncolors) + continue; + /* We can also ignore BOS/BOL arcs */ + if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1]) + continue; + /* ... but EOS/EOL arcs terminate the search */ + if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1]) + { + thiscolor = COLORLESS; + break; + } + if (thiscolor == COLORLESS) + { + /* First plain outarc */ + thiscolor = ca->co; + nextst = ca->to; + } + else if (thiscolor == ca->co) + { + /* Another plain outarc for same color */ + nextst = -1; + } + else + { + /* More than one plain outarc color terminates the search */ + thiscolor = COLORLESS; + break; + } + } + /* Done if we didn't find exactly one color on plain outarcs */ + if (thiscolor == COLORLESS) + break; + /* The color must be a singleton */ + if (cm->cd[thiscolor].nchrs != 1) + break; + + /* + * Identify the color's sole member chr and add it to the prefix + * string. In general the colormap data structure doesn't provide a + * way to find color member chrs, except by trying GETCOLOR() on each + * possible chr value, which won't do at all. However, for the cases + * we care about it should be sufficient to test the "firstchr" value, + * that is the first chr ever added to the color. There are cases + * where this might no longer be a member of the color (so we do need + * to test), but none of them are likely to arise for a character that + * is a member of a common prefix. If we do hit such a corner case, + * we just fall out without adding anything to the prefix string. + */ + c = cm->cd[thiscolor].firstchr; + if (GETCOLOR(cm, c) != thiscolor) + break; + + string[(*slength)++] = c; + + /* Advance to next state, but only if we have a unique next state */ + } while (nextst != -1); + + /* + * If we ended at a state that only has EOS/EOL outarcs leading to the + * "post" state, then we have an exact-match string. Note this is true + * even if the string is of zero length. + */ + nextst = -1; + for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++) + { + if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1]) + { + if (nextst == -1) + nextst = ca->to; + else if (nextst != ca->to) + { + nextst = -1; + break; + } + } + else + { + nextst = -1; + break; + } + } + if (nextst == cnfa->post) + return REG_EXACT; + + /* + * Otherwise, if we were unable to identify any prefix characters, say + * NOMATCH --- the pattern is anchored left, but doesn't specify any + * particular first character. + */ + if (*slength > 0) + return REG_PREFIX; + + return REG_NOMATCH; +} diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 96c77078c8b..074142e7985 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -1170,3 +1170,68 @@ build_regexp_split_result(regexp_matches_ctx *splitctx) Int32GetDatum(startpos + 1)); } } + +/* + * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp + * + * The result is NULL if there is no fixed prefix, else a palloc'd string. + * If it is an exact match, not just a prefix, *exact is returned as TRUE. + */ +char * +regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, + bool *exact) +{ + char *result; + regex_t *re; + int cflags; + int re_result; + pg_wchar *str; + size_t slen; + size_t maxlen; + char errMsg[100]; + + *exact = false; /* default result */ + + /* Compile RE */ + cflags = REG_ADVANCED; + if (case_insensitive) + cflags |= REG_ICASE; + + re = RE_compile_and_cache(text_re, cflags, collation); + + /* Examine it to see if there's a fixed prefix */ + re_result = pg_regprefix(re, &str, &slen); + + switch (re_result) + { + case REG_NOMATCH: + return NULL; + + case REG_PREFIX: + /* continue with wchar conversion */ + break; + + case REG_EXACT: + *exact = true; + /* continue with wchar conversion */ + break; + + default: + /* re failed??? */ + pg_regerror(re_result, re, errMsg, sizeof(errMsg)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("regular expression failed: %s", errMsg))); + break; + } + + /* Convert pg_wchar result back to database encoding */ + maxlen = pg_database_encoding_max_length() * slen + 1; + result = (char *) palloc(maxlen); + slen = pg_wchar2mb_with_len(str, result, slen); + Assert(slen < maxlen); + + free(str); + + return result; +} diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7eb64cba4bb..40e1bebac16 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -195,7 +195,8 @@ static Selectivity prefix_selectivity(PlannerInfo *root, static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive); static Selectivity regex_selectivity(const char *patt, int pattlen, - bool case_insensitive); + bool case_insensitive, + int fixed_prefix_len); static Datum string_to_datum(const char *str, Oid datatype); static Const *string_to_const(const char *str, Oid datatype); static Const *string_to_bytea_const(const char *str, size_t str_len); @@ -5255,18 +5256,9 @@ static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec) { - char *match; - int pos, - match_pos, - prev_pos, - prev_match_pos; - bool have_leading_paren; - char *patt; - char *rest; Oid typeid = patt_const->consttype; - bool is_multibyte = (pg_database_encoding_max_length() > 1); - pg_locale_t locale = 0; - bool locale_is_c = false; + char *prefix; + bool exact; /* * Should be unnecessary, there are no bytea regex operators defined. As @@ -5278,185 +5270,54 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("regular-expression matching not supported on type bytea"))); - if (case_insensitive) - { - /* If case-insensitive, we need locale info */ - if (lc_ctype_is_c(collation)) - locale_is_c = true; - else if (collation != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collation)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for regular expression"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } - locale = pg_newlocale_from_collation(collation); - } - } + /* Use the regexp machinery to extract the prefix, if any */ + prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue), + case_insensitive, collation, + &exact); - /* the right-hand const is type text for all of these */ - patt = TextDatumGetCString(patt_const->constvalue); - - /* - * Check for ARE director prefix. It's worth our trouble to recognize - * this because similar_escape() used to use it, and some other code might - * still use it, to force ARE mode. - */ - pos = 0; - if (strncmp(patt, "***:", 4) == 0) - pos = 4; - - /* Pattern must be anchored left */ - if (patt[pos] != '^') + if (prefix == NULL) { *prefix_const = NULL; if (rest_selec != NULL) + { + char *patt = TextDatumGetCString(patt_const->constvalue); + *rest_selec = regex_selectivity(patt, strlen(patt), - case_insensitive); - - return Pattern_Prefix_None; - } - pos++; - - /* - * If '|' is present in pattern, then there may be multiple alternatives - * for the start of the string. (There are cases where this isn't so, for - * instance if the '|' is inside parens, but detecting that reliably is - * too hard.) - */ - if (strchr(patt + pos, '|') != NULL) - { - *prefix_const = NULL; - - if (rest_selec != NULL) - *rest_selec = regex_selectivity(patt, strlen(patt), - case_insensitive); + case_insensitive, + 0); + pfree(patt); + } return Pattern_Prefix_None; } - /* OK, allocate space for pattern */ - match = palloc(strlen(patt) + 1); - prev_match_pos = match_pos = 0; - - /* - * We special-case the syntax '^(...)$' because psql uses it. But beware: - * sequences beginning "(?" are not what they seem, unless they're "(?:". - * (We must recognize that because of similar_escape().) - */ - have_leading_paren = false; - if (patt[pos] == '(' && - (patt[pos + 1] != '?' || patt[pos + 2] == ':')) - { - have_leading_paren = true; - pos += (patt[pos + 1] != '?' ? 1 : 3); - } - - /* Scan remainder of pattern */ - prev_pos = pos; - while (patt[pos]) - { - int len; - - /* - * Check for characters that indicate multiple possible matches here. - * Also, drop out at ')' or '$' so the termination test works right. - */ - if (patt[pos] == '.' || - patt[pos] == '(' || - patt[pos] == ')' || - patt[pos] == '[' || - patt[pos] == '^' || - patt[pos] == '$') - break; - - /* Stop if case-varying character (it's sort of a wildcard) */ - if (case_insensitive && - pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c)) - break; - - /* - * Check for quantifiers. Except for +, this means the preceding - * character is optional, so we must remove it from the prefix too! - */ - if (patt[pos] == '*' || - patt[pos] == '?' || - patt[pos] == '{') - { - match_pos = prev_match_pos; - pos = prev_pos; - break; - } - if (patt[pos] == '+') - { - pos = prev_pos; - break; - } - - /* - * Normally, backslash quotes the next character. But in AREs, - * backslash followed by alphanumeric is an escape, not a quoted - * character. Must treat it as having multiple possible matches. - * Note: since only ASCII alphanumerics are escapes, we don't have to - * be paranoid about multibyte or collations here. - */ - if (patt[pos] == '\\') - { - if (isalnum((unsigned char) patt[pos + 1])) - break; - pos++; - if (patt[pos] == '\0') - break; - } - /* save position in case we need to back up on next loop cycle */ - prev_match_pos = match_pos; - prev_pos = pos; - /* must use encoding-aware processing here */ - len = pg_mblen(&patt[pos]); - memcpy(&match[match_pos], &patt[pos], len); - match_pos += len; - pos += len; - } - - match[match_pos] = '\0'; - rest = &patt[pos]; - - if (have_leading_paren && patt[pos] == ')') - pos++; - - if (patt[pos] == '$' && patt[pos + 1] == '\0') - { - *prefix_const = string_to_const(match, typeid); - - if (rest_selec != NULL) - *rest_selec = 1.0; - - pfree(patt); - pfree(match); - - return Pattern_Prefix_Exact; /* pattern specifies exact match */ - } - - *prefix_const = string_to_const(match, typeid); + *prefix_const = string_to_const(prefix, typeid); if (rest_selec != NULL) - *rest_selec = regex_selectivity(rest, strlen(rest), - case_insensitive); + { + if (exact) + { + /* Exact match, so there's no additional selectivity */ + *rest_selec = 1.0; + } + else + { + char *patt = TextDatumGetCString(patt_const->constvalue); - pfree(patt); - pfree(match); + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive, + strlen(prefix)); + pfree(patt); + } + } - if (match_pos > 0) + pfree(prefix); + + if (exact) + return Pattern_Prefix_Exact; /* pattern specifies exact match */ + else return Pattern_Prefix_Partial; - - return Pattern_Prefix_None; } Pattern_Prefix_Status @@ -5741,7 +5602,8 @@ regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive) } static Selectivity -regex_selectivity(const char *patt, int pattlen, bool case_insensitive) +regex_selectivity(const char *patt, int pattlen, bool case_insensitive, + int fixed_prefix_len) { Selectivity sel; @@ -5757,9 +5619,14 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive) /* no trailing $ */ sel = regex_selectivity_sub(patt, pattlen, case_insensitive); sel *= FULL_WILDCARD_SEL; - if (sel > 1.0) - sel = 1.0; } + + /* If there's a fixed prefix, discount its selectivity */ + if (fixed_prefix_len > 0) + sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len); + + /* Make sure result stays in range */ + CLAMP_PROBABILITY(sel); return sel; } diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index cec4b837cd1..616c2c6450d 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -156,6 +156,9 @@ typedef struct /* two specials for debugging and testing */ #define REG_ATOI 101 /* convert error-code name to number */ #define REG_ITOA 102 /* convert error-code number to name */ +/* non-error result codes for pg_regprefix */ +#define REG_PREFIX (-1) /* identified a common prefix */ +#define REG_EXACT (-2) /* identified an exact match */ @@ -164,6 +167,7 @@ typedef struct */ extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid); extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int); +extern int pg_regprefix(regex_t *, pg_wchar **, size_t *); extern void pg_regfree(regex_t *); extern size_t pg_regerror(int, const regex_t *, char *, size_t); extern void pg_set_regex_collation(Oid collation); diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index b8788506d41..e1e406f4eaa 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -199,19 +199,21 @@ struct colordesc color sub; /* open subcolor, if any; or free-chain ptr */ #define NOSUB COLORLESS /* value of "sub" when no open subcolor */ struct arc *arcs; /* chain of all arcs of this color */ + chr firstchr; /* char first assigned to this color */ int flags; /* bit values defined next */ #define FREECOL 01 /* currently free */ #define PSEUDO 02 /* pseudocolor, no real chars */ -#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) +#define UNUSEDCOLOR(cd) ((cd)->flags & FREECOL) union tree *block; /* block of solid color, if any */ }; /* * The color map itself * - * Only the "tree" part is used at execution time, and that only via the - * GETCOLOR() macro. Possibly that should be separated from the compile-time - * data. + * Much of the data in the colormap struct is only used at compile time. + * However, the bulk of the space usage is in the "tree" structure, so it's + * not clear that there's much point in converting the rest to a more compact + * form when compilation is finished. */ struct colormap { diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 10634039a44..b43a4f1f6f3 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -568,6 +568,8 @@ extern Datum regexp_split_to_table(PG_FUNCTION_ARGS); extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS); extern Datum regexp_split_to_array(PG_FUNCTION_ARGS); extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS); +extern char *regexp_fixed_prefix(text *text_re, bool case_insensitive, + Oid collation, bool *exact); /* regproc.c */ extern Datum regprocin(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index dc0c713b408..658538fd419 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -90,3 +90,66 @@ select substring('a' from '((a)+)'); a (1 row) +-- Test conversion of regex patterns to indexable conditions +explain (costs off) select * from pg_proc where proname ~ 'abc'; + QUERY PLAN +----------------------------------- + Seq Scan on pg_proc + Filter: (proname ~ 'abc'::text) +(2 rows) + +explain (costs off) select * from pg_proc where proname ~ '^abc'; + QUERY PLAN +---------------------------------------------------------------------- + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name)) + Filter: (proname ~ '^abc'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^abc$'; + QUERY PLAN +------------------------------------------------------------ + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: (proname = 'abc'::name) + Filter: (proname ~ '^abc$'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^abcd*e'; + QUERY PLAN +---------------------------------------------------------------------- + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name)) + Filter: (proname ~ '^abcd*e'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^abc+d'; + QUERY PLAN +---------------------------------------------------------------------- + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name)) + Filter: (proname ~ '^abc+d'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)'; + QUERY PLAN +---------------------------------------------------------------------------- + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abcdef'::name) AND (proname < 'abcdeg'::name)) + Filter: (proname ~ '^(abc)(def)'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^(abc)$'; + QUERY PLAN +------------------------------------------------------------ + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: (proname = 'abc'::name) + Filter: (proname ~ '^(abc)$'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^(abc)?d'; + QUERY PLAN +---------------------------------------- + Seq Scan on pg_proc + Filter: (proname ~ '^(abc)?d'::text) +(2 rows) + diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index 9fdcb2f5bd5..c29ed05d768 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -24,3 +24,13 @@ select 'abc abc abd' ~ '^(.+)( \1)+$' as f; select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)'); select substring('a' from '((a))+'); select substring('a' from '((a)+)'); + +-- Test conversion of regex patterns to indexable conditions +explain (costs off) select * from pg_proc where proname ~ 'abc'; +explain (costs off) select * from pg_proc where proname ~ '^abc'; +explain (costs off) select * from pg_proc where proname ~ '^abc$'; +explain (costs off) select * from pg_proc where proname ~ '^abcd*e'; +explain (costs off) select * from pg_proc where proname ~ '^abc+d'; +explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)'; +explain (costs off) select * from pg_proc where proname ~ '^(abc)$'; +explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';