mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Fix regexp substring matching (substring(string from pattern)) for the corner
case where there is a match to the pattern overall but the user has specified
a parenthesized subexpression and that subexpression hasn't got a match.
An example is substring('foo' from 'foo(bar)?').  This should return NULL,
since (bar) isn't matched, but it was mistakenly returning the whole-pattern
match instead (ie, 'foo').  Per bug #4044 from Rui Martins.
This has been broken since the beginning; patch in all supported versions.
The old behavior was sufficiently inconsistent that it's impossible to believe
anyone is depending on it.
			
			
This commit is contained in:
		@@ -8,7 +8,7 @@
 | 
				
			|||||||
 *
 | 
					 *
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * IDENTIFICATION
 | 
					 * IDENTIFICATION
 | 
				
			||||||
 *	  $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.78 2008/01/01 19:45:52 momjian Exp $
 | 
					 *	  $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.79 2008/03/19 02:40:37 tgl Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *		Alistair Crooks added the code for the regex caching
 | 
					 *		Alistair Crooks added the code for the regex caching
 | 
				
			||||||
 *		agc - cached the regular expressions used - there's a good chance
 | 
					 *		agc - cached the regular expressions used - there's a good chance
 | 
				
			||||||
@@ -576,8 +576,13 @@ textregexsubstr(PG_FUNCTION_ARGS)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
	text	   *s = PG_GETARG_TEXT_PP(0);
 | 
						text	   *s = PG_GETARG_TEXT_PP(0);
 | 
				
			||||||
	text	   *p = PG_GETARG_TEXT_PP(1);
 | 
						text	   *p = PG_GETARG_TEXT_PP(1);
 | 
				
			||||||
	bool		match;
 | 
						regex_t    *re;
 | 
				
			||||||
	regmatch_t	pmatch[2];
 | 
						regmatch_t	pmatch[2];
 | 
				
			||||||
 | 
						int			so,
 | 
				
			||||||
 | 
									eo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Compile RE */
 | 
				
			||||||
 | 
						re = RE_compile_and_cache(p, regex_flavor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We pass two regmatch_t structs to get info about the overall match and
 | 
						 * We pass two regmatch_t structs to get info about the overall match and
 | 
				
			||||||
@@ -585,34 +590,37 @@ textregexsubstr(PG_FUNCTION_ARGS)
 | 
				
			|||||||
	 * is a parenthesized subexpression, we return what it matched; else
 | 
						 * is a parenthesized subexpression, we return what it matched; else
 | 
				
			||||||
	 * return what the whole regexp matched.
 | 
						 * return what the whole regexp matched.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	match = RE_compile_and_execute(p,
 | 
						if (!RE_execute(re,
 | 
				
			||||||
								   VARDATA_ANY(s),
 | 
										VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
 | 
				
			||||||
								   VARSIZE_ANY_EXHDR(s),
 | 
										2, pmatch))
 | 
				
			||||||
								   regex_flavor,
 | 
							PG_RETURN_NULL();		/* definitely no match */
 | 
				
			||||||
								   2, pmatch);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* match? then return the substring matching the pattern */
 | 
						if (re->re_nsub > 0)
 | 
				
			||||||
	if (match)
 | 
					 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		int			so,
 | 
							/* has parenthesized subexpressions, use the first one */
 | 
				
			||||||
					eo;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		so = pmatch[1].rm_so;
 | 
							so = pmatch[1].rm_so;
 | 
				
			||||||
		eo = pmatch[1].rm_eo;
 | 
							eo = pmatch[1].rm_eo;
 | 
				
			||||||
		if (so < 0 || eo < 0)
 | 
						}
 | 
				
			||||||
		{
 | 
						else
 | 
				
			||||||
			/* no parenthesized subexpression */
 | 
						{
 | 
				
			||||||
			so = pmatch[0].rm_so;
 | 
							/* no parenthesized subexpression, use whole match */
 | 
				
			||||||
			eo = pmatch[0].rm_eo;
 | 
							so = pmatch[0].rm_so;
 | 
				
			||||||
		}
 | 
							eo = pmatch[0].rm_eo;
 | 
				
			||||||
 | 
					 | 
				
			||||||
		return DirectFunctionCall3(text_substr,
 | 
					 | 
				
			||||||
								   PointerGetDatum(s),
 | 
					 | 
				
			||||||
								   Int32GetDatum(so + 1),
 | 
					 | 
				
			||||||
								   Int32GetDatum(eo - so));
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	PG_RETURN_NULL();
 | 
						/*
 | 
				
			||||||
 | 
						 * It is possible to have a match to the whole pattern but no match
 | 
				
			||||||
 | 
						 * for a subexpression; for example 'foo(bar)?' is considered to match
 | 
				
			||||||
 | 
						 * 'foo' but there is no subexpression match.  So this extra test for
 | 
				
			||||||
 | 
						 * match failure is not redundant.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (so < 0 || eo < 0)
 | 
				
			||||||
 | 
							PG_RETURN_NULL();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return DirectFunctionCall3(text_substr,
 | 
				
			||||||
 | 
												   PointerGetDatum(s),
 | 
				
			||||||
 | 
												   Int32GetDatum(so + 1),
 | 
				
			||||||
 | 
												   Int32GetDatum(eo - so));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user