mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Fix documentation of regular expression character-entry escapes.
The docs claimed that \uhhhh would be interpreted as a Unicode value regardless of the database encoding, but it's never been implemented that way: \uhhhh and \xhhhh actually mean exactly the same thing, namely the character that pg_mb2wchar translates to 0xhhhh. Moreover we were falsely dismissive of the usefulness of Unicode code points above FFFF. Fix that. It's been like this for ages, so back-patch to all supported branches.
This commit is contained in:
		@@ -4417,7 +4417,7 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo;
 | 
				
			|||||||
       <entry> <literal>\e</> </entry>
 | 
					       <entry> <literal>\e</> </entry>
 | 
				
			||||||
       <entry> the character whose collating-sequence name
 | 
					       <entry> the character whose collating-sequence name
 | 
				
			||||||
       is <literal>ESC</>,
 | 
					       is <literal>ESC</>,
 | 
				
			||||||
       or failing that, the character with octal value 033 </entry>
 | 
					       or failing that, the character with octal value <literal>033</> </entry>
 | 
				
			||||||
       </row>
 | 
					       </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
       <row>
 | 
					       <row>
 | 
				
			||||||
@@ -4443,15 +4443,17 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo;
 | 
				
			|||||||
       <row>
 | 
					       <row>
 | 
				
			||||||
       <entry> <literal>\u</><replaceable>wxyz</> </entry>
 | 
					       <entry> <literal>\u</><replaceable>wxyz</> </entry>
 | 
				
			||||||
       <entry> (where <replaceable>wxyz</> is exactly four hexadecimal digits)
 | 
					       <entry> (where <replaceable>wxyz</> is exactly four hexadecimal digits)
 | 
				
			||||||
       the UTF16 (Unicode, 16-bit) character <literal>U+</><replaceable>wxyz</>
 | 
					       the character whose hexadecimal value is
 | 
				
			||||||
       in the local byte ordering </entry>
 | 
					       <literal>0x</><replaceable>wxyz</>
 | 
				
			||||||
 | 
					       </entry>
 | 
				
			||||||
       </row>
 | 
					       </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
       <row>
 | 
					       <row>
 | 
				
			||||||
       <entry> <literal>\U</><replaceable>stuvwxyz</> </entry>
 | 
					       <entry> <literal>\U</><replaceable>stuvwxyz</> </entry>
 | 
				
			||||||
       <entry> (where <replaceable>stuvwxyz</> is exactly eight hexadecimal
 | 
					       <entry> (where <replaceable>stuvwxyz</> is exactly eight hexadecimal
 | 
				
			||||||
       digits)
 | 
					       digits)
 | 
				
			||||||
       reserved for a hypothetical Unicode extension to 32 bits
 | 
					       the character whose hexadecimal value is
 | 
				
			||||||
 | 
					       <literal>0x</><replaceable>stuvwxyz</>
 | 
				
			||||||
       </entry>
 | 
					       </entry>
 | 
				
			||||||
       </row>
 | 
					       </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -4500,6 +4502,17 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo;
 | 
				
			|||||||
    Octal digits are <literal>0</>-<literal>7</>.
 | 
					    Octal digits are <literal>0</>-<literal>7</>.
 | 
				
			||||||
   </para>
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <para>
 | 
				
			||||||
 | 
					    Numeric character-entry escapes specifying values outside the ASCII range
 | 
				
			||||||
 | 
					    (0-127) have meanings dependent on the database encoding.  When the
 | 
				
			||||||
 | 
					    encoding is UTF-8, escape values are equivalent to Unicode code points,
 | 
				
			||||||
 | 
					    for example <literal>\u1234</> means the character <literal>U+1234</>.
 | 
				
			||||||
 | 
					    For other multibyte encodings, character-entry escapes usually just
 | 
				
			||||||
 | 
					    specify the concatenation of the byte values for the character.  If the
 | 
				
			||||||
 | 
					    escape value does not correspond to any legal character in the database
 | 
				
			||||||
 | 
					    encoding, no error will be raised, but it will never match any data.
 | 
				
			||||||
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   <para>
 | 
					   <para>
 | 
				
			||||||
    The character-entry escapes are always taken as ordinary characters.
 | 
					    The character-entry escapes are always taken as ordinary characters.
 | 
				
			||||||
    For example, <literal>\135</> is <literal>]</> in ASCII, but
 | 
					    For example, <literal>\135</> is <literal>]</> in ASCII, but
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user