mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Back-patch fix for extraction of fixed prefixes from regular expressions.
Back-patch of commits628cbb50baandc6aae3042b. This has been broken since 7.3, so back-patch to all supported branches.
This commit is contained in:
		@@ -12,7 +12,7 @@ subdir = src/backend/regex
 | 
				
			|||||||
top_builddir = ../../..
 | 
					top_builddir = ../../..
 | 
				
			||||||
include $(top_builddir)/src/Makefile.global
 | 
					include $(top_builddir)/src/Makefile.global
 | 
				
			||||||
 | 
					
 | 
				
			||||||
OBJS = regcomp.o regerror.o regexec.o regfree.o
 | 
					OBJS = regcomp.o regerror.o regexec.o regfree.o regprefix.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include $(top_srcdir)/src/backend/common.mk
 | 
					include $(top_srcdir)/src/backend/common.mk
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -66,8 +66,9 @@ initcm(struct vars * v,
 | 
				
			|||||||
	cd = cm->cd;				/* cm->cd[WHITE] */
 | 
						cd = cm->cd;				/* cm->cd[WHITE] */
 | 
				
			||||||
	cd->sub = NOSUB;
 | 
						cd->sub = NOSUB;
 | 
				
			||||||
	cd->arcs = NULL;
 | 
						cd->arcs = NULL;
 | 
				
			||||||
	cd->flags = 0;
 | 
						cd->firstchr = CHR_MIN;
 | 
				
			||||||
	cd->nchrs = CHR_MAX - CHR_MIN + 1;
 | 
						cd->nchrs = CHR_MAX - CHR_MIN + 1;
 | 
				
			||||||
 | 
						cd->flags = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* upper levels of tree */
 | 
						/* upper levels of tree */
 | 
				
			||||||
	for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--)
 | 
						for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--)
 | 
				
			||||||
@@ -272,6 +273,7 @@ newcolor(struct colormap * cm)
 | 
				
			|||||||
	cd->nchrs = 0;
 | 
						cd->nchrs = 0;
 | 
				
			||||||
	cd->sub = NOSUB;
 | 
						cd->sub = NOSUB;
 | 
				
			||||||
	cd->arcs = NULL;
 | 
						cd->arcs = NULL;
 | 
				
			||||||
 | 
						cd->firstchr = CHR_MIN;		/* in case never set otherwise */
 | 
				
			||||||
	cd->flags = 0;
 | 
						cd->flags = 0;
 | 
				
			||||||
	cd->block = NULL;
 | 
						cd->block = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -371,6 +373,8 @@ subcolor(struct colormap * cm, chr c)
 | 
				
			|||||||
	if (co == sco)				/* already in an open subcolor */
 | 
						if (co == sco)				/* already in an open subcolor */
 | 
				
			||||||
		return co;				/* rest is redundant */
 | 
							return co;				/* rest is redundant */
 | 
				
			||||||
	cm->cd[co].nchrs--;
 | 
						cm->cd[co].nchrs--;
 | 
				
			||||||
 | 
						if (cm->cd[sco].nchrs == 0)
 | 
				
			||||||
 | 
							cm->cd[sco].firstchr = c;
 | 
				
			||||||
	cm->cd[sco].nchrs++;
 | 
						cm->cd[sco].nchrs++;
 | 
				
			||||||
	setcolor(cm, c, sco);
 | 
						setcolor(cm, c, sco);
 | 
				
			||||||
	return sco;
 | 
						return sco;
 | 
				
			||||||
@@ -438,6 +442,11 @@ subrange(struct vars * v,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * subblock - allocate new subcolors for one tree block of chrs, fill in arcs
 | 
					 * subblock - allocate new subcolors for one tree block of chrs, fill in arcs
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Note: subcolors that are created during execution of this function
 | 
				
			||||||
 | 
					 * will not be given a useful value of firstchr; it'll be left as CHR_MIN.
 | 
				
			||||||
 | 
					 * For the current usage of firstchr in pg_regprefix, this does not matter
 | 
				
			||||||
 | 
					 * because such subcolors won't occur in the common prefix of a regex.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
subblock(struct vars * v,
 | 
					subblock(struct vars * v,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1330,14 +1330,16 @@ compact(struct nfa * nfa,
 | 
				
			|||||||
	for (s = nfa->states; s != NULL; s = s->next)
 | 
						for (s = nfa->states; s != NULL; s = s->next)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		nstates++;
 | 
							nstates++;
 | 
				
			||||||
		narcs += 1 + s->nouts + 1;
 | 
							narcs += s->nouts + 1;		/* need one extra for endmarker */
 | 
				
			||||||
		/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
 | 
				
			||||||
	cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
 | 
						cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
 | 
				
			||||||
	cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
 | 
						cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
 | 
				
			||||||
	if (cnfa->states == NULL || cnfa->arcs == NULL)
 | 
						if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
 | 
							if (cnfa->stflags != NULL)
 | 
				
			||||||
 | 
								FREE(cnfa->stflags);
 | 
				
			||||||
		if (cnfa->states != NULL)
 | 
							if (cnfa->states != NULL)
 | 
				
			||||||
			FREE(cnfa->states);
 | 
								FREE(cnfa->states);
 | 
				
			||||||
		if (cnfa->arcs != NULL)
 | 
							if (cnfa->arcs != NULL)
 | 
				
			||||||
@@ -1359,9 +1361,8 @@ compact(struct nfa * nfa,
 | 
				
			|||||||
	for (s = nfa->states; s != NULL; s = s->next)
 | 
						for (s = nfa->states; s != NULL; s = s->next)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		assert((size_t) s->no < nstates);
 | 
							assert((size_t) s->no < nstates);
 | 
				
			||||||
 | 
							cnfa->stflags[s->no] = 0;
 | 
				
			||||||
		cnfa->states[s->no] = ca;
 | 
							cnfa->states[s->no] = ca;
 | 
				
			||||||
		ca->co = 0;				/* clear and skip flags "arc" */
 | 
					 | 
				
			||||||
		ca++;
 | 
					 | 
				
			||||||
		first = ca;
 | 
							first = ca;
 | 
				
			||||||
		for (a = s->outs; a != NULL; a = a->outchain)
 | 
							for (a = s->outs; a != NULL; a = a->outchain)
 | 
				
			||||||
			switch (a->type)
 | 
								switch (a->type)
 | 
				
			||||||
@@ -1392,8 +1393,8 @@ compact(struct nfa * nfa,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	/* mark no-progress states */
 | 
						/* mark no-progress states */
 | 
				
			||||||
	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
 | 
						for (a = nfa->pre->outs; a != NULL; a = a->outchain)
 | 
				
			||||||
		cnfa->states[a->to->no]->co = 1;
 | 
							cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
 | 
				
			||||||
	cnfa->states[nfa->pre->no]->co = 1;
 | 
						cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
@@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
	assert(cnfa->nstates != 0); /* not empty already */
 | 
						assert(cnfa->nstates != 0); /* not empty already */
 | 
				
			||||||
	cnfa->nstates = 0;
 | 
						cnfa->nstates = 0;
 | 
				
			||||||
 | 
						FREE(cnfa->stflags);
 | 
				
			||||||
	FREE(cnfa->states);
 | 
						FREE(cnfa->states);
 | 
				
			||||||
	FREE(cnfa->arcs);
 | 
						FREE(cnfa->arcs);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa,
 | 
				
			|||||||
		fprintf(f, ", haslacons");
 | 
							fprintf(f, ", haslacons");
 | 
				
			||||||
	fprintf(f, "\n");
 | 
						fprintf(f, "\n");
 | 
				
			||||||
	for (st = 0; st < cnfa->nstates; st++)
 | 
						for (st = 0; st < cnfa->nstates; st++)
 | 
				
			||||||
		dumpcstate(st, cnfa->states[st], cnfa, f);
 | 
							dumpcstate(st, cnfa, f);
 | 
				
			||||||
	fflush(f);
 | 
						fflush(f);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa,
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
dumpcstate(int st,
 | 
					dumpcstate(int st,
 | 
				
			||||||
		   struct carc * ca,
 | 
					 | 
				
			||||||
		   struct cnfa * cnfa,
 | 
							   struct cnfa * cnfa,
 | 
				
			||||||
		   FILE *f)
 | 
							   FILE *f)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int			i;
 | 
						struct carc * ca;
 | 
				
			||||||
	int			pos;
 | 
						int			pos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
 | 
						fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
 | 
				
			||||||
	pos = 1;
 | 
						pos = 1;
 | 
				
			||||||
	for (i = 1; ca[i].co != COLORLESS; i++)
 | 
						for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		if (ca[i].co < cnfa->ncolors)
 | 
							if (ca->co < cnfa->ncolors)
 | 
				
			||||||
			fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
 | 
								fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
			fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
 | 
								fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
 | 
				
			||||||
					ca[i].to);
 | 
					 | 
				
			||||||
		if (pos == 5)
 | 
							if (pos == 5)
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			fprintf(f, "\n");
 | 
								fprintf(f, "\n");
 | 
				
			||||||
@@ -1653,7 +1653,7 @@ dumpcstate(int st,
 | 
				
			|||||||
		else
 | 
							else
 | 
				
			||||||
			pos++;
 | 
								pos++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (i == 1 || pos != 1)
 | 
						if (ca == cnfa->states[st] || pos != 1)
 | 
				
			||||||
		fprintf(f, "\n");
 | 
							fprintf(f, "\n");
 | 
				
			||||||
	fflush(f);
 | 
						fflush(f);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -162,7 +162,7 @@ static void dumparcs(struct state *, FILE *);
 | 
				
			|||||||
static int	dumprarcs(struct arc *, struct state *, FILE *, int);
 | 
					static int	dumprarcs(struct arc *, struct state *, FILE *, int);
 | 
				
			||||||
static void dumparc(struct arc *, struct state *, FILE *);
 | 
					static void dumparc(struct arc *, struct state *, FILE *);
 | 
				
			||||||
static void dumpcnfa(struct cnfa *, FILE *);
 | 
					static void dumpcnfa(struct cnfa *, FILE *);
 | 
				
			||||||
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
 | 
					static void dumpcstate(int, struct cnfa *, FILE *);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
/* === regc_cvec.c === */
 | 
					/* === regc_cvec.c === */
 | 
				
			||||||
static struct cvec *newcvec(int, int);
 | 
					static struct cvec *newcvec(int, int);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -458,14 +458,14 @@ miss(struct vars * v,			/* used only for debug flags */
 | 
				
			|||||||
	gotstate = 0;
 | 
						gotstate = 0;
 | 
				
			||||||
	for (i = 0; i < d->nstates; i++)
 | 
						for (i = 0; i < d->nstates; i++)
 | 
				
			||||||
		if (ISBSET(css->states, i))
 | 
							if (ISBSET(css->states, i))
 | 
				
			||||||
			for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++)
 | 
								for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
 | 
				
			||||||
				if (ca->co == co)
 | 
									if (ca->co == co)
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					BSET(d->work, ca->to);
 | 
										BSET(d->work, ca->to);
 | 
				
			||||||
					gotstate = 1;
 | 
										gotstate = 1;
 | 
				
			||||||
					if (ca->to == cnfa->post)
 | 
										if (ca->to == cnfa->post)
 | 
				
			||||||
						ispost = 1;
 | 
											ispost = 1;
 | 
				
			||||||
					if (!cnfa->states[ca->to]->co)
 | 
										if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
 | 
				
			||||||
						noprogress = 0;
 | 
											noprogress = 0;
 | 
				
			||||||
					FDEBUG(("%d -> %d\n", i, ca->to));
 | 
										FDEBUG(("%d -> %d\n", i, ca->to));
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
@@ -476,10 +476,9 @@ miss(struct vars * v,			/* used only for debug flags */
 | 
				
			|||||||
		dolacons = 0;
 | 
							dolacons = 0;
 | 
				
			||||||
		for (i = 0; i < d->nstates; i++)
 | 
							for (i = 0; i < d->nstates; i++)
 | 
				
			||||||
			if (ISBSET(d->work, i))
 | 
								if (ISBSET(d->work, i))
 | 
				
			||||||
				for (ca = cnfa->states[i] + 1; ca->co != COLORLESS;
 | 
									for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
 | 
				
			||||||
					 ca++)
 | 
					 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					if (ca->co <= cnfa->ncolors)
 | 
										if (ca->co < cnfa->ncolors)
 | 
				
			||||||
						continue;		/* NOTE CONTINUE */
 | 
											continue;		/* NOTE CONTINUE */
 | 
				
			||||||
					sawlacons = 1;
 | 
										sawlacons = 1;
 | 
				
			||||||
					if (ISBSET(d->work, ca->to))
 | 
										if (ISBSET(d->work, ca->to))
 | 
				
			||||||
@@ -490,7 +489,7 @@ miss(struct vars * v,			/* used only for debug flags */
 | 
				
			|||||||
					dolacons = 1;
 | 
										dolacons = 1;
 | 
				
			||||||
					if (ca->to == cnfa->post)
 | 
										if (ca->to == cnfa->post)
 | 
				
			||||||
						ispost = 1;
 | 
											ispost = 1;
 | 
				
			||||||
					if (!cnfa->states[ca->to]->co)
 | 
										if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
 | 
				
			||||||
						noprogress = 0;
 | 
											noprogress = 0;
 | 
				
			||||||
					FDEBUG(("%d :> %d\n", i, ca->to));
 | 
										FDEBUG(("%d :> %d\n", i, ca->to));
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										259
									
								
								src/backend/regex/regprefix.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								src/backend/regex/regprefix.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,259 @@
 | 
				
			|||||||
 | 
					/*-------------------------------------------------------------------------
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * regprefix.c
 | 
				
			||||||
 | 
					 *	  Extract a common prefix, if any, from a compiled regex.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Portions Copyright (c) 2012, PostgreSQL Global Development Group
 | 
				
			||||||
 | 
					 * Portions Copyright (c) 1998, 1999 Henry Spencer
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * IDENTIFICATION
 | 
				
			||||||
 | 
					 *	  src/backend/regex/regprefix.c
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *-------------------------------------------------------------------------
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "regex/regguts.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * forward declarations
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int findprefix(struct cnfa * cnfa, struct colormap * cm,
 | 
				
			||||||
 | 
										  chr *string, size_t *slength);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * pg_regprefix - get common prefix for regular expression
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Returns one of:
 | 
				
			||||||
 | 
					 *	REG_NOMATCH: there is no common prefix of strings matching the regex
 | 
				
			||||||
 | 
					 *	REG_PREFIX: there is a common prefix of strings matching the regex
 | 
				
			||||||
 | 
					 *	REG_EXACT: all strings satisfying the regex must match the same string
 | 
				
			||||||
 | 
					 *	or a REG_XXX error code
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * In the non-failure cases, *string is set to a malloc'd string containing
 | 
				
			||||||
 | 
					 * the common prefix or exact value, of length *slength (measured in chrs
 | 
				
			||||||
 | 
					 * not bytes!).
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This function does not analyze all complex cases (such as lookahead
 | 
				
			||||||
 | 
					 * constraints) exactly.  Therefore it is possible that some strings matching
 | 
				
			||||||
 | 
					 * the reported prefix or exact-match string do not satisfy the regex.  But
 | 
				
			||||||
 | 
					 * it should never be the case that a string satisfying the regex does not
 | 
				
			||||||
 | 
					 * match the reported prefix or exact-match string.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int
 | 
				
			||||||
 | 
					pg_regprefix(regex_t *re,
 | 
				
			||||||
 | 
								 chr **string,
 | 
				
			||||||
 | 
								 size_t *slength)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct guts *g;
 | 
				
			||||||
 | 
						struct cnfa *cnfa;
 | 
				
			||||||
 | 
						int			st;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* sanity checks */
 | 
				
			||||||
 | 
						if (string == NULL || slength == NULL)
 | 
				
			||||||
 | 
							return REG_INVARG;
 | 
				
			||||||
 | 
						*string = NULL;				/* initialize for failure cases */
 | 
				
			||||||
 | 
						*slength = 0;
 | 
				
			||||||
 | 
						if (re == NULL || re->re_magic != REMAGIC)
 | 
				
			||||||
 | 
							return REG_INVARG;
 | 
				
			||||||
 | 
						if (re->re_csize != sizeof(chr))
 | 
				
			||||||
 | 
							return REG_MIXED;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Initialize locale-dependent support */
 | 
				
			||||||
 | 
						pg_set_regex_collation(re->re_collation);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* setup */
 | 
				
			||||||
 | 
						g = (struct guts *) re->re_guts;
 | 
				
			||||||
 | 
						if (g->info & REG_UIMPOSSIBLE)
 | 
				
			||||||
 | 
							return REG_NOMATCH;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * This implementation considers only the search NFA for the topmost regex
 | 
				
			||||||
 | 
						 * tree node.  Therefore, constraints such as backrefs are not fully
 | 
				
			||||||
 | 
						 * applied, which is allowed per the function's API spec.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						assert(g->tree != NULL);
 | 
				
			||||||
 | 
						cnfa = &g->tree->cnfa;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Since a correct NFA should never contain any exit-free loops, it should
 | 
				
			||||||
 | 
						 * not be possible for our traversal to return to a previously visited
 | 
				
			||||||
 | 
						 * NFA state.  Hence we need at most nstates chrs in the output string.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						*string = (chr *) MALLOC(cnfa->nstates * sizeof(chr));
 | 
				
			||||||
 | 
						if (*string == NULL)
 | 
				
			||||||
 | 
							return REG_ESPACE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* do it */
 | 
				
			||||||
 | 
						st = findprefix(cnfa, &g->cmap, *string, slength);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						assert(*slength <= cnfa->nstates);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* clean up */
 | 
				
			||||||
 | 
						if (st != REG_PREFIX && st != REG_EXACT)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							FREE(*string);
 | 
				
			||||||
 | 
							*string = NULL;
 | 
				
			||||||
 | 
							*slength = 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return st;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * findprefix - extract common prefix from cNFA
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Results are returned into the preallocated chr array string[], with
 | 
				
			||||||
 | 
					 * *slength (which must be preset to zero) incremented for each chr.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int						/* regprefix return code */
 | 
				
			||||||
 | 
					findprefix(struct cnfa * cnfa,
 | 
				
			||||||
 | 
							   struct colormap * cm,
 | 
				
			||||||
 | 
							   chr *string,
 | 
				
			||||||
 | 
							   size_t *slength)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int			st;
 | 
				
			||||||
 | 
						int			nextst;
 | 
				
			||||||
 | 
						color		thiscolor;
 | 
				
			||||||
 | 
						chr			c;
 | 
				
			||||||
 | 
						struct carc *ca;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * The "pre" state must have only BOS/BOL outarcs, else pattern isn't
 | 
				
			||||||
 | 
						 * anchored left.  If we have both BOS and BOL, they must go to the
 | 
				
			||||||
 | 
						 * same next state.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						st = cnfa->pre;
 | 
				
			||||||
 | 
						nextst = -1;
 | 
				
			||||||
 | 
						for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								if (nextst == -1)
 | 
				
			||||||
 | 
									nextst = ca->to;
 | 
				
			||||||
 | 
								else if (nextst != ca->to)
 | 
				
			||||||
 | 
									return REG_NOMATCH;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								return REG_NOMATCH;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (nextst == -1)
 | 
				
			||||||
 | 
							return REG_NOMATCH;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Scan through successive states, stopping as soon as we find one with
 | 
				
			||||||
 | 
						 * more than one acceptable transition character (either multiple colors
 | 
				
			||||||
 | 
						 * on out-arcs, or a color with more than one member chr).
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * We could find a state with multiple out-arcs that are all labeled with
 | 
				
			||||||
 | 
						 * the same singleton color; this comes from patterns like "^ab(cde|cxy)".
 | 
				
			||||||
 | 
						 * In that case we add the chr "c" to the output string but then exit the
 | 
				
			||||||
 | 
						 * loop with nextst == -1.  This leaves a little bit on the table: if the
 | 
				
			||||||
 | 
						 * pattern is like "^ab(cde|cdy)", we won't notice that "d" could be added
 | 
				
			||||||
 | 
						 * to the prefix.  But chasing multiple parallel state chains doesn't seem
 | 
				
			||||||
 | 
						 * worth the trouble.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						do
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							st = nextst;
 | 
				
			||||||
 | 
							nextst = -1;
 | 
				
			||||||
 | 
							thiscolor = COLORLESS;
 | 
				
			||||||
 | 
							for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/* We ignore lookahead constraints */
 | 
				
			||||||
 | 
								if (ca->co >= cnfa->ncolors)
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
 | 
								/* We can also ignore BOS/BOL arcs */
 | 
				
			||||||
 | 
								if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
 | 
								/* ... but EOS/EOL arcs terminate the search */
 | 
				
			||||||
 | 
								if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1])
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									thiscolor = COLORLESS;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								if (thiscolor == COLORLESS)
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									/* First plain outarc */
 | 
				
			||||||
 | 
									thiscolor = ca->co;
 | 
				
			||||||
 | 
									nextst = ca->to;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								else if (thiscolor == ca->co)
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									/* Another plain outarc for same color */
 | 
				
			||||||
 | 
									nextst = -1;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								else
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									/* More than one plain outarc color terminates the search */
 | 
				
			||||||
 | 
									thiscolor = COLORLESS;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							/* Done if we didn't find exactly one color on plain outarcs */
 | 
				
			||||||
 | 
							if (thiscolor == COLORLESS)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							/* The color must be a singleton */
 | 
				
			||||||
 | 
							if (cm->cd[thiscolor].nchrs != 1)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Identify the color's sole member chr and add it to the prefix
 | 
				
			||||||
 | 
							 * string.  In general the colormap data structure doesn't provide a
 | 
				
			||||||
 | 
							 * way to find color member chrs, except by trying GETCOLOR() on each
 | 
				
			||||||
 | 
							 * possible chr value, which won't do at all.  However, for the cases
 | 
				
			||||||
 | 
							 * we care about it should be sufficient to test the "firstchr" value,
 | 
				
			||||||
 | 
							 * that is the first chr ever added to the color.  There are cases
 | 
				
			||||||
 | 
							 * where this might no longer be a member of the color (so we do need
 | 
				
			||||||
 | 
							 * to test), but none of them are likely to arise for a character that
 | 
				
			||||||
 | 
							 * is a member of a common prefix.  If we do hit such a corner case,
 | 
				
			||||||
 | 
							 * we just fall out without adding anything to the prefix string.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							c = cm->cd[thiscolor].firstchr;
 | 
				
			||||||
 | 
							if (GETCOLOR(cm, c) != thiscolor)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							string[(*slength)++] = c;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Advance to next state, but only if we have a unique next state */
 | 
				
			||||||
 | 
						} while (nextst != -1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If we ended at a state that only has EOS/EOL outarcs leading to the
 | 
				
			||||||
 | 
						 * "post" state, then we have an exact-match string.  Note this is true
 | 
				
			||||||
 | 
						 * even if the string is of zero length.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						nextst = -1;
 | 
				
			||||||
 | 
						for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1])
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								if (nextst == -1)
 | 
				
			||||||
 | 
									nextst = ca->to;
 | 
				
			||||||
 | 
								else if (nextst != ca->to)
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									nextst = -1;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								nextst = -1;
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (nextst == cnfa->post)
 | 
				
			||||||
 | 
							return REG_EXACT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Otherwise, if we were unable to identify any prefix characters, say
 | 
				
			||||||
 | 
						 * NOMATCH --- the pattern is anchored left, but doesn't specify any
 | 
				
			||||||
 | 
						 * particular first character.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (*slength > 0)
 | 
				
			||||||
 | 
							return REG_PREFIX;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return REG_NOMATCH;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -1170,3 +1170,68 @@ build_regexp_split_result(regexp_matches_ctx *splitctx)
 | 
				
			|||||||
								   Int32GetDatum(startpos + 1));
 | 
													   Int32GetDatum(startpos + 1));
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * The result is NULL if there is no fixed prefix, else a palloc'd string.
 | 
				
			||||||
 | 
					 * If it is an exact match, not just a prefix, *exact is returned as TRUE.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					char *
 | 
				
			||||||
 | 
					regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
 | 
				
			||||||
 | 
										bool *exact)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						char	   *result;
 | 
				
			||||||
 | 
						regex_t    *re;
 | 
				
			||||||
 | 
						int			cflags;
 | 
				
			||||||
 | 
						int			re_result;
 | 
				
			||||||
 | 
						pg_wchar   *str;
 | 
				
			||||||
 | 
						size_t		slen;
 | 
				
			||||||
 | 
						size_t		maxlen;
 | 
				
			||||||
 | 
						char		errMsg[100];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						*exact = false;				/* default result */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Compile RE */
 | 
				
			||||||
 | 
						cflags = REG_ADVANCED;
 | 
				
			||||||
 | 
						if (case_insensitive)
 | 
				
			||||||
 | 
							cflags |= REG_ICASE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						re = RE_compile_and_cache(text_re, cflags, collation);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Examine it to see if there's a fixed prefix */
 | 
				
			||||||
 | 
						re_result = pg_regprefix(re, &str, &slen);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch (re_result)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							case REG_NOMATCH:
 | 
				
			||||||
 | 
								return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							case REG_PREFIX:
 | 
				
			||||||
 | 
								/* continue with wchar conversion */
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							case REG_EXACT:
 | 
				
			||||||
 | 
								*exact = true;
 | 
				
			||||||
 | 
								/* continue with wchar conversion */
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							default:
 | 
				
			||||||
 | 
								/* re failed??? */
 | 
				
			||||||
 | 
								pg_regerror(re_result, re, errMsg, sizeof(errMsg));
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
 | 
				
			||||||
 | 
										 errmsg("regular expression failed: %s", errMsg)));
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Convert pg_wchar result back to database encoding */
 | 
				
			||||||
 | 
						maxlen = pg_database_encoding_max_length() * slen + 1;
 | 
				
			||||||
 | 
						result = (char *) palloc(maxlen);
 | 
				
			||||||
 | 
						slen = pg_wchar2mb_with_len(str, result, slen);
 | 
				
			||||||
 | 
						Assert(slen < maxlen);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						free(str);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return result;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -189,7 +189,8 @@ static Selectivity prefix_selectivity(PlannerInfo *root,
 | 
				
			|||||||
static Selectivity like_selectivity(const char *patt, int pattlen,
 | 
					static Selectivity like_selectivity(const char *patt, int pattlen,
 | 
				
			||||||
									bool case_insensitive);
 | 
														bool case_insensitive);
 | 
				
			||||||
static Selectivity regex_selectivity(const char *patt, int pattlen,
 | 
					static Selectivity regex_selectivity(const char *patt, int pattlen,
 | 
				
			||||||
									 bool case_insensitive);
 | 
														 bool case_insensitive,
 | 
				
			||||||
 | 
														 int fixed_prefix_len);
 | 
				
			||||||
static Datum string_to_datum(const char *str, Oid datatype);
 | 
					static Datum string_to_datum(const char *str, Oid datatype);
 | 
				
			||||||
static Const *string_to_const(const char *str, Oid datatype);
 | 
					static Const *string_to_const(const char *str, Oid datatype);
 | 
				
			||||||
static Const *string_to_bytea_const(const char *str, size_t str_len);
 | 
					static Const *string_to_bytea_const(const char *str, size_t str_len);
 | 
				
			||||||
@@ -5013,18 +5014,9 @@ static Pattern_Prefix_Status
 | 
				
			|||||||
regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 | 
					regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 | 
				
			||||||
				   Const **prefix_const, Selectivity *rest_selec)
 | 
									   Const **prefix_const, Selectivity *rest_selec)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	char	   *match;
 | 
					 | 
				
			||||||
	int			pos,
 | 
					 | 
				
			||||||
				match_pos,
 | 
					 | 
				
			||||||
				prev_pos,
 | 
					 | 
				
			||||||
				prev_match_pos;
 | 
					 | 
				
			||||||
	bool		have_leading_paren;
 | 
					 | 
				
			||||||
	char	   *patt;
 | 
					 | 
				
			||||||
	char	   *rest;
 | 
					 | 
				
			||||||
	Oid			typeid = patt_const->consttype;
 | 
						Oid			typeid = patt_const->consttype;
 | 
				
			||||||
	bool		is_multibyte = (pg_database_encoding_max_length() > 1);
 | 
						char	   *prefix;
 | 
				
			||||||
	pg_locale_t locale = 0;
 | 
						bool		exact;
 | 
				
			||||||
	bool		locale_is_c = false;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Should be unnecessary, there are no bytea regex operators defined. As
 | 
						 * Should be unnecessary, there are no bytea regex operators defined. As
 | 
				
			||||||
@@ -5036,185 +5028,54 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 | 
				
			|||||||
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 | 
									(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 | 
				
			||||||
		 errmsg("regular-expression matching not supported on type bytea")));
 | 
							 errmsg("regular-expression matching not supported on type bytea")));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (case_insensitive)
 | 
						/* Use the regexp machinery to extract the prefix, if any */
 | 
				
			||||||
	{
 | 
						prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
 | 
				
			||||||
		/* If case-insensitive, we need locale info */
 | 
													 case_insensitive, collation,
 | 
				
			||||||
		if (lc_ctype_is_c(collation))
 | 
													 &exact);
 | 
				
			||||||
			locale_is_c = true;
 | 
					 | 
				
			||||||
		else if (collation != DEFAULT_COLLATION_OID)
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			if (!OidIsValid(collation))
 | 
					 | 
				
			||||||
			{
 | 
					 | 
				
			||||||
				/*
 | 
					 | 
				
			||||||
				 * This typically means that the parser could not resolve a
 | 
					 | 
				
			||||||
				 * conflict of implicit collations, so report it that way.
 | 
					 | 
				
			||||||
				 */
 | 
					 | 
				
			||||||
				ereport(ERROR,
 | 
					 | 
				
			||||||
						(errcode(ERRCODE_INDETERMINATE_COLLATION),
 | 
					 | 
				
			||||||
						 errmsg("could not determine which collation to use for regular expression"),
 | 
					 | 
				
			||||||
						 errhint("Use the COLLATE clause to set the collation explicitly.")));
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			locale = pg_newlocale_from_collation(collation);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* the right-hand const is type text for all of these */
 | 
						if (prefix == NULL)
 | 
				
			||||||
	patt = TextDatumGetCString(patt_const->constvalue);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Check for ARE director prefix.  It's worth our trouble to recognize
 | 
					 | 
				
			||||||
	 * this because similar_escape() used to use it, and some other code might
 | 
					 | 
				
			||||||
	 * still use it, to force ARE mode.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	pos = 0;
 | 
					 | 
				
			||||||
	if (strncmp(patt, "***:", 4) == 0)
 | 
					 | 
				
			||||||
		pos = 4;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Pattern must be anchored left */
 | 
					 | 
				
			||||||
	if (patt[pos] != '^')
 | 
					 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		*prefix_const = NULL;
 | 
							*prefix_const = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (rest_selec != NULL)
 | 
							if (rest_selec != NULL)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								char   *patt = TextDatumGetCString(patt_const->constvalue);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			*rest_selec = regex_selectivity(patt, strlen(patt),
 | 
								*rest_selec = regex_selectivity(patt, strlen(patt),
 | 
				
			||||||
											case_insensitive);
 | 
																case_insensitive,
 | 
				
			||||||
 | 
																0);
 | 
				
			||||||
		return Pattern_Prefix_None;
 | 
								pfree(patt);
 | 
				
			||||||
	}
 | 
							}
 | 
				
			||||||
	pos++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * If '|' is present in pattern, then there may be multiple alternatives
 | 
					 | 
				
			||||||
	 * for the start of the string.  (There are cases where this isn't so, for
 | 
					 | 
				
			||||||
	 * instance if the '|' is inside parens, but detecting that reliably is
 | 
					 | 
				
			||||||
	 * too hard.)
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (strchr(patt + pos, '|') != NULL)
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		*prefix_const = NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (rest_selec != NULL)
 | 
					 | 
				
			||||||
			*rest_selec = regex_selectivity(patt, strlen(patt),
 | 
					 | 
				
			||||||
											case_insensitive);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		return Pattern_Prefix_None;
 | 
							return Pattern_Prefix_None;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* OK, allocate space for pattern */
 | 
						*prefix_const = string_to_const(prefix, typeid);
 | 
				
			||||||
	match = palloc(strlen(patt) + 1);
 | 
					 | 
				
			||||||
	prev_match_pos = match_pos = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * We special-case the syntax '^(...)$' because psql uses it.  But beware:
 | 
					 | 
				
			||||||
	 * sequences beginning "(?" are not what they seem, unless they're "(?:".
 | 
					 | 
				
			||||||
	 * (We must recognize that because of similar_escape().)
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	have_leading_paren = false;
 | 
					 | 
				
			||||||
	if (patt[pos] == '(' &&
 | 
					 | 
				
			||||||
		(patt[pos + 1] != '?' || patt[pos + 2] == ':'))
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		have_leading_paren = true;
 | 
					 | 
				
			||||||
		pos += (patt[pos + 1] != '?' ? 1 : 3);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Scan remainder of pattern */
 | 
					 | 
				
			||||||
	prev_pos = pos;
 | 
					 | 
				
			||||||
	while (patt[pos])
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		int			len;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Check for characters that indicate multiple possible matches here.
 | 
					 | 
				
			||||||
		 * Also, drop out at ')' or '$' so the termination test works right.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (patt[pos] == '.' ||
 | 
					 | 
				
			||||||
			patt[pos] == '(' ||
 | 
					 | 
				
			||||||
			patt[pos] == ')' ||
 | 
					 | 
				
			||||||
			patt[pos] == '[' ||
 | 
					 | 
				
			||||||
			patt[pos] == '^' ||
 | 
					 | 
				
			||||||
			patt[pos] == '$')
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* Stop if case-varying character (it's sort of a wildcard) */
 | 
					 | 
				
			||||||
		if (case_insensitive &&
 | 
					 | 
				
			||||||
		  pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Check for quantifiers.  Except for +, this means the preceding
 | 
					 | 
				
			||||||
		 * character is optional, so we must remove it from the prefix too!
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (patt[pos] == '*' ||
 | 
					 | 
				
			||||||
			patt[pos] == '?' ||
 | 
					 | 
				
			||||||
			patt[pos] == '{')
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			match_pos = prev_match_pos;
 | 
					 | 
				
			||||||
			pos = prev_pos;
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		if (patt[pos] == '+')
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			pos = prev_pos;
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Normally, backslash quotes the next character.  But in AREs,
 | 
					 | 
				
			||||||
		 * backslash followed by alphanumeric is an escape, not a quoted
 | 
					 | 
				
			||||||
		 * character.  Must treat it as having multiple possible matches.
 | 
					 | 
				
			||||||
		 * Note: since only ASCII alphanumerics are escapes, we don't have to
 | 
					 | 
				
			||||||
		 * be paranoid about multibyte or collations here.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (patt[pos] == '\\')
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			if (isalnum((unsigned char) patt[pos + 1]))
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
			pos++;
 | 
					 | 
				
			||||||
			if (patt[pos] == '\0')
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		/* save position in case we need to back up on next loop cycle */
 | 
					 | 
				
			||||||
		prev_match_pos = match_pos;
 | 
					 | 
				
			||||||
		prev_pos = pos;
 | 
					 | 
				
			||||||
		/* must use encoding-aware processing here */
 | 
					 | 
				
			||||||
		len = pg_mblen(&patt[pos]);
 | 
					 | 
				
			||||||
		memcpy(&match[match_pos], &patt[pos], len);
 | 
					 | 
				
			||||||
		match_pos += len;
 | 
					 | 
				
			||||||
		pos += len;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	match[match_pos] = '\0';
 | 
					 | 
				
			||||||
	rest = &patt[pos];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (have_leading_paren && patt[pos] == ')')
 | 
					 | 
				
			||||||
		pos++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (patt[pos] == '$' && patt[pos + 1] == '\0')
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		*prefix_const = string_to_const(match, typeid);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (rest_selec != NULL)
 | 
					 | 
				
			||||||
			*rest_selec = 1.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		pfree(patt);
 | 
					 | 
				
			||||||
		pfree(match);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		return Pattern_Prefix_Exact;	/* pattern specifies exact match */
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*prefix_const = string_to_const(match, typeid);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (rest_selec != NULL)
 | 
						if (rest_selec != NULL)
 | 
				
			||||||
		*rest_selec = regex_selectivity(rest, strlen(rest),
 | 
						{
 | 
				
			||||||
										case_insensitive);
 | 
							if (exact)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/* Exact match, so there's no additional selectivity */
 | 
				
			||||||
 | 
								*rest_selec = 1.0;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								char   *patt = TextDatumGetCString(patt_const->constvalue);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pfree(patt);
 | 
								*rest_selec = regex_selectivity(patt, strlen(patt),
 | 
				
			||||||
	pfree(match);
 | 
																case_insensitive,
 | 
				
			||||||
 | 
																strlen(prefix));
 | 
				
			||||||
 | 
								pfree(patt);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (match_pos > 0)
 | 
						pfree(prefix);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (exact)
 | 
				
			||||||
 | 
							return Pattern_Prefix_Exact;	/* pattern specifies exact match */
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
		return Pattern_Prefix_Partial;
 | 
							return Pattern_Prefix_Partial;
 | 
				
			||||||
 | 
					 | 
				
			||||||
	return Pattern_Prefix_None;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Pattern_Prefix_Status
 | 
					Pattern_Prefix_Status
 | 
				
			||||||
@@ -5499,7 +5360,8 @@ regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static Selectivity
 | 
					static Selectivity
 | 
				
			||||||
regex_selectivity(const char *patt, int pattlen, bool case_insensitive)
 | 
					regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
 | 
				
			||||||
 | 
									  int fixed_prefix_len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	Selectivity sel;
 | 
						Selectivity sel;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -5515,9 +5377,14 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive)
 | 
				
			|||||||
		/* no trailing $ */
 | 
							/* no trailing $ */
 | 
				
			||||||
		sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
 | 
							sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
 | 
				
			||||||
		sel *= FULL_WILDCARD_SEL;
 | 
							sel *= FULL_WILDCARD_SEL;
 | 
				
			||||||
		if (sel > 1.0)
 | 
					 | 
				
			||||||
			sel = 1.0;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* If there's a fixed prefix, discount its selectivity */
 | 
				
			||||||
 | 
						if (fixed_prefix_len > 0)
 | 
				
			||||||
 | 
							sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Make sure result stays in range */
 | 
				
			||||||
 | 
						CLAMP_PROBABILITY(sel);
 | 
				
			||||||
	return sel;
 | 
						return sel;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -156,6 +156,9 @@ typedef struct
 | 
				
			|||||||
/* two specials for debugging and testing */
 | 
					/* two specials for debugging and testing */
 | 
				
			||||||
#define REG_ATOI	101			/* convert error-code name to number */
 | 
					#define REG_ATOI	101			/* convert error-code name to number */
 | 
				
			||||||
#define REG_ITOA	102			/* convert error-code number to name */
 | 
					#define REG_ITOA	102			/* convert error-code number to name */
 | 
				
			||||||
 | 
					/* non-error result codes for pg_regprefix */
 | 
				
			||||||
 | 
					#define REG_PREFIX	(-1)		/* identified a common prefix */
 | 
				
			||||||
 | 
					#define REG_EXACT	(-2)		/* identified an exact match */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -164,6 +167,7 @@ typedef struct
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
extern int	pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid);
 | 
					extern int	pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid);
 | 
				
			||||||
extern int	pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
 | 
					extern int	pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
 | 
				
			||||||
 | 
					extern int	pg_regprefix(regex_t *, pg_wchar **, size_t *);
 | 
				
			||||||
extern void pg_regfree(regex_t *);
 | 
					extern void pg_regfree(regex_t *);
 | 
				
			||||||
extern size_t pg_regerror(int, const regex_t *, char *, size_t);
 | 
					extern size_t pg_regerror(int, const regex_t *, char *, size_t);
 | 
				
			||||||
extern void pg_set_regex_collation(Oid collation);
 | 
					extern void pg_set_regex_collation(Oid collation);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -188,6 +188,7 @@ struct colordesc
 | 
				
			|||||||
	color		sub;			/* open subcolor (if any); free chain ptr */
 | 
						color		sub;			/* open subcolor (if any); free chain ptr */
 | 
				
			||||||
#define  NOSUB	 COLORLESS
 | 
					#define  NOSUB	 COLORLESS
 | 
				
			||||||
	struct arc *arcs;			/* color chain */
 | 
						struct arc *arcs;			/* color chain */
 | 
				
			||||||
 | 
						chr			firstchr;		/* char first assigned to this color */
 | 
				
			||||||
	int			flags;
 | 
						int			flags;
 | 
				
			||||||
#define  FREECOL 01				/* currently free */
 | 
					#define  FREECOL 01				/* currently free */
 | 
				
			||||||
#define  PSEUDO  02				/* pseudocolor, no real chars */
 | 
					#define  PSEUDO  02				/* pseudocolor, no real chars */
 | 
				
			||||||
@@ -255,15 +256,14 @@ struct state;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
struct arc
 | 
					struct arc
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int			type;
 | 
						int			type;			/* 0 if free, else an NFA arc type code */
 | 
				
			||||||
#define  ARCFREE '\0'
 | 
					 | 
				
			||||||
	color		co;
 | 
						color		co;
 | 
				
			||||||
	struct state *from;			/* where it's from (and contained within) */
 | 
						struct state *from;			/* where it's from (and contained within) */
 | 
				
			||||||
	struct state *to;			/* where it's to */
 | 
						struct state *to;			/* where it's to */
 | 
				
			||||||
	struct arc *outchain;		/* *from's outs chain or free chain */
 | 
						struct arc *outchain;		/* link in *from's outs chain or free chain */
 | 
				
			||||||
#define  freechain	 outchain
 | 
					#define  freechain	 outchain
 | 
				
			||||||
	struct arc *inchain;		/* *to's ins chain */
 | 
						struct arc *inchain;		/* link in *to's ins chain */
 | 
				
			||||||
	struct arc *colorchain;		/* color's arc chain */
 | 
						struct arc *colorchain;		/* link in color's arc chain */
 | 
				
			||||||
	struct arc *colorchainRev;	/* back-link in color's arc chain */
 | 
						struct arc *colorchainRev;	/* back-link in color's arc chain */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -315,24 +315,38 @@ struct nfa
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * definitions for compacted NFA
 | 
					 * definitions for compacted NFA
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * The main space savings in a compacted NFA is from making the arcs as small
 | 
				
			||||||
 | 
					 * as possible.  We store only the transition color and next-state number for
 | 
				
			||||||
 | 
					 * each arc.  The list of out arcs for each state is an array beginning at
 | 
				
			||||||
 | 
					 * cnfa.states[statenumber], and terminated by a dummy carc struct with
 | 
				
			||||||
 | 
					 * co == COLORLESS.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * The non-dummy carc structs are of two types: plain arcs and LACON arcs.
 | 
				
			||||||
 | 
					 * Plain arcs just store the transition color number as "co".  LACON arcs
 | 
				
			||||||
 | 
					 * store the lookahead constraint number plus cnfa.ncolors as "co".  LACON
 | 
				
			||||||
 | 
					 * arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
struct carc
 | 
					struct carc
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	color		co;				/* COLORLESS is list terminator */
 | 
						color		co;				/* COLORLESS is list terminator */
 | 
				
			||||||
	int			to;				/* state number */
 | 
						int			to;				/* next-state number */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct cnfa
 | 
					struct cnfa
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int			nstates;		/* number of states */
 | 
						int			nstates;		/* number of states */
 | 
				
			||||||
	int			ncolors;		/* number of colors */
 | 
						int			ncolors;		/* number of colors (max color in use + 1) */
 | 
				
			||||||
	int			flags;
 | 
						int			flags;
 | 
				
			||||||
#define  HASLACONS	 01			/* uses lookahead constraints */
 | 
					#define  HASLACONS	01			/* uses lookahead constraints */
 | 
				
			||||||
	int			pre;			/* setup state number */
 | 
						int			pre;			/* setup state number */
 | 
				
			||||||
	int			post;			/* teardown state number */
 | 
						int			post;			/* teardown state number */
 | 
				
			||||||
	color		bos[2];			/* colors, if any, assigned to BOS and BOL */
 | 
						color		bos[2];			/* colors, if any, assigned to BOS and BOL */
 | 
				
			||||||
	color		eos[2];			/* colors, if any, assigned to EOS and EOL */
 | 
						color		eos[2];			/* colors, if any, assigned to EOS and EOL */
 | 
				
			||||||
 | 
						char	   *stflags;		/* vector of per-state flags bytes */
 | 
				
			||||||
 | 
					#define  CNFA_NOPROGRESS	01	/* flag bit for a no-progress state */
 | 
				
			||||||
	struct carc **states;		/* vector of pointers to outarc lists */
 | 
						struct carc **states;		/* vector of pointers to outarc lists */
 | 
				
			||||||
 | 
						/* states[n] are pointers into a single malloc'd array of arcs */
 | 
				
			||||||
	struct carc *arcs;			/* the area for the lists */
 | 
						struct carc *arcs;			/* the area for the lists */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -544,6 +544,8 @@ extern Datum regexp_split_to_table(PG_FUNCTION_ARGS);
 | 
				
			|||||||
extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS);
 | 
					extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS);
 | 
				
			||||||
extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
 | 
					extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
 | 
				
			||||||
extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS);
 | 
					extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS);
 | 
				
			||||||
 | 
					extern char *regexp_fixed_prefix(text *text_re, bool case_insensitive,
 | 
				
			||||||
 | 
													 Oid collation, bool *exact);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* regproc.c */
 | 
					/* regproc.c */
 | 
				
			||||||
extern Datum regprocin(PG_FUNCTION_ARGS);
 | 
					extern Datum regprocin(PG_FUNCTION_ARGS);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user