mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Improve similar_escape() in two different ways:
* Stop escaping ? and {.  As of SQL:2008, SIMILAR TO is defined to have
POSIX-compatible interpretation of ? as well as {m,n} and related constructs,
so we should allow these things through to our regex engine.
* Escape ^ and $.  It appears that our regex engine will treat ^^ at the
beginning of the string the same as ^, and similarly for $$ at the end of
the string, which meant that SIMILAR TO was effectively ignoring ^ at the
start of the pattern and $ at the end.  Since these are not supposed to be
metacharacters, this is a bug.
The second part of this is arguably a back-patchable bug fix, but I'm
hesitant to do that because it might break applications that are expecting
something like "col SIMILAR TO '^foo$'" to work like a POSIX pattern.
Seems safer to only change it at a major version boundary.
Per discussion of an example from Doug Gorley.
			
			
This commit is contained in:
		@@ -1,4 +1,4 @@
 | 
				
			|||||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.488 2009/10/09 21:02:55 petere Exp $ -->
 | 
					<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.489 2009/10/10 03:50:15 tgl Exp $ -->
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 <chapter id="functions">
 | 
					 <chapter id="functions">
 | 
				
			||||||
  <title>Functions and Operators</title>
 | 
					  <title>Functions and Operators</title>
 | 
				
			||||||
@@ -3154,6 +3154,31 @@ cast(-44 as bit(12))           <lineannotation>111111010100</lineannotation>
 | 
				
			|||||||
      or more times.
 | 
					      or more times.
 | 
				
			||||||
     </para>
 | 
					     </para>
 | 
				
			||||||
    </listitem>
 | 
					    </listitem>
 | 
				
			||||||
 | 
					    <listitem>
 | 
				
			||||||
 | 
					     <para>
 | 
				
			||||||
 | 
					      <literal>?</literal> denotes repetition of the previous item zero
 | 
				
			||||||
 | 
					      or one time.
 | 
				
			||||||
 | 
					     </para>
 | 
				
			||||||
 | 
					    </listitem>
 | 
				
			||||||
 | 
					    <listitem>
 | 
				
			||||||
 | 
					     <para>
 | 
				
			||||||
 | 
					      <literal>{</><replaceable>m</><literal>}</literal> denotes repetition
 | 
				
			||||||
 | 
					      of the previous item exactly <replaceable>m</> times.
 | 
				
			||||||
 | 
					     </para>
 | 
				
			||||||
 | 
					    </listitem>
 | 
				
			||||||
 | 
					    <listitem>
 | 
				
			||||||
 | 
					     <para>
 | 
				
			||||||
 | 
					      <literal>{</><replaceable>m</><literal>,}</literal> denotes repetition
 | 
				
			||||||
 | 
					      of the previous item <replaceable>m</> or more times.
 | 
				
			||||||
 | 
					     </para>
 | 
				
			||||||
 | 
					    </listitem>
 | 
				
			||||||
 | 
					    <listitem>
 | 
				
			||||||
 | 
					     <para>
 | 
				
			||||||
 | 
					      <literal>{</><replaceable>m</><literal>,</><replaceable>n</><literal>}</>
 | 
				
			||||||
 | 
					      denotes repetition of the previous item at least <replaceable>m</> and
 | 
				
			||||||
 | 
					      not more than <replaceable>n</> times.
 | 
				
			||||||
 | 
					     </para>
 | 
				
			||||||
 | 
					    </listitem>
 | 
				
			||||||
    <listitem>
 | 
					    <listitem>
 | 
				
			||||||
     <para>
 | 
					     <para>
 | 
				
			||||||
      Parentheses <literal>()</literal> can be used to group items into
 | 
					      Parentheses <literal>()</literal> can be used to group items into
 | 
				
			||||||
@@ -3168,9 +3193,8 @@ cast(-44 as bit(12))           <lineannotation>111111010100</lineannotation>
 | 
				
			|||||||
    </listitem>
 | 
					    </listitem>
 | 
				
			||||||
   </itemizedlist>
 | 
					   </itemizedlist>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Notice that bounded repetition operators (<literal>?</> and
 | 
					    Notice that the period (<literal>.</>) is not a metacharacter
 | 
				
			||||||
    <literal>{...}</>) are not provided, though they exist in POSIX.
 | 
					    for <function>SIMILAR TO</>.
 | 
				
			||||||
    Also, the period (<literal>.</>) is not a metacharacter.
 | 
					 | 
				
			||||||
   </para>
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   <para>
 | 
					   <para>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,7 +8,7 @@
 | 
				
			|||||||
 *
 | 
					 *
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * IDENTIFICATION
 | 
					 * IDENTIFICATION
 | 
				
			||||||
 *	  $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.82 2009/06/11 14:49:04 momjian Exp $
 | 
					 *	  $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.83 2009/10/10 03:50:15 tgl Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *		Alistair Crooks added the code for the regex caching
 | 
					 *		Alistair Crooks added the code for the regex caching
 | 
				
			||||||
 *		agc - cached the regular expressions used - there's a good chance
 | 
					 *		agc - cached the regular expressions used - there's a good chance
 | 
				
			||||||
@@ -639,7 +639,7 @@ textregexreplace(PG_FUNCTION_ARGS)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * similar_escape()
 | 
					 * similar_escape()
 | 
				
			||||||
 * Convert a SQL99 regexp pattern to POSIX style, so it can be used by
 | 
					 * Convert a SQL:2008 regexp pattern to POSIX style, so it can be used by
 | 
				
			||||||
 * our regexp engine.
 | 
					 * our regexp engine.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
Datum
 | 
					Datum
 | 
				
			||||||
@@ -740,8 +740,8 @@ similar_escape(PG_FUNCTION_ARGS)
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
		else if (pchar == '_')
 | 
							else if (pchar == '_')
 | 
				
			||||||
			*r++ = '.';
 | 
								*r++ = '.';
 | 
				
			||||||
		else if (pchar == '\\' || pchar == '.' || pchar == '?' ||
 | 
							else if (pchar == '\\' || pchar == '.' ||
 | 
				
			||||||
				 pchar == '{')
 | 
									 pchar == '^' || pchar == '$')
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			*r++ = '\\';
 | 
								*r++ = '\\';
 | 
				
			||||||
			*r++ = pchar;
 | 
								*r++ = pchar;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user