1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Add combining characters to unaccent.rules.

Strip certain classes of combining characters, so that accents encoded
this way are removed.

Author: Hugh Ranalli
Discussion: https://postgr.es/m/15548-cef1b3f8de190d4f%40postgresql.org
This commit is contained in:
Thomas Munro
2019-02-01 15:23:01 +01:00
parent 80579f9bb1
commit 456e3718e7
4 changed files with 157 additions and 1 deletions

View File

@ -31,6 +31,12 @@ SELECT unaccent('˃˖˗˜');
>+-~
(1 row)
SELECT unaccent('À'); -- Remove combining diacritical 0x0300
unaccent
----------
A
(1 row)
SELECT unaccent('unaccent', 'foobar');
unaccent
----------
@ -55,6 +61,12 @@ SELECT unaccent('unaccent', '˃˖˗˜');
>+-~
(1 row)
SELECT unaccent('unaccent', 'À');
unaccent
----------
A
(1 row)
SELECT ts_lexize('unaccent', 'foobar');
ts_lexize
-----------
@ -79,3 +91,9 @@ SELECT ts_lexize('unaccent', '˃˖˗˜');
{>+-~}
(1 row)
SELECT ts_lexize('unaccent', 'À');
ts_lexize
-----------
{A}
(1 row)