1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Add combining characters to unaccent.rules.

Strip certain classes of combining characters, so that accents encoded
this way are removed.

Author: Hugh Ranalli
Discussion: https://postgr.es/m/15548-cef1b3f8de190d4f%40postgresql.org
This commit is contained in:
Thomas Munro
2019-02-01 15:23:01 +01:00
parent 80579f9bb1
commit 456e3718e7
4 changed files with 157 additions and 1 deletions

View File

@ -9,13 +9,16 @@ SELECT unaccent('foobar');
SELECT unaccent('ёлка');
SELECT unaccent('ЁЖИК');
SELECT unaccent('˃˖˗˜');
SELECT unaccent(''); -- Remove combining diacritical 0x0300
SELECT unaccent('unaccent', 'foobar');
SELECT unaccent('unaccent', 'ёлка');
SELECT unaccent('unaccent', 'ЁЖИК');
SELECT unaccent('unaccent', '˃˖˗˜');
SELECT unaccent('unaccent', '');
SELECT ts_lexize('unaccent', 'foobar');
SELECT ts_lexize('unaccent', 'ёлка');
SELECT ts_lexize('unaccent', 'ЁЖИК');
SELECT ts_lexize('unaccent', '˃˖˗˜');
SELECT ts_lexize('unaccent', '');