1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Add support for Daitch-Mokotoff Soundex in contrib/fuzzystrmatch.

This modernized version of Soundex works significantly better than
the original, particularly for non-English names.

Dag Lem, reviewed by quite a few people along the way

Discussion: https://postgr.es/m/yger1atbgfy.fsf@sid.nimrod.no
This commit is contained in:
Tom Lane
2023-04-07 17:31:51 -04:00
parent 728015a470
commit a290378a37
13 changed files with 1315 additions and 11 deletions

View File

@ -19,3 +19,48 @@ SELECT metaphone('GUMBO', 4);
SELECT dmetaphone('gumbo');
SELECT dmetaphone_alt('gumbo');
-- Wovels
SELECT daitch_mokotoff('Augsburg');
SELECT daitch_mokotoff('Breuer');
SELECT daitch_mokotoff('Freud');
-- The letter "H"
SELECT daitch_mokotoff('Halberstadt');
SELECT daitch_mokotoff('Mannheim');
-- Adjacent sounds
SELECT daitch_mokotoff('Chernowitz');
-- Adjacent letters with identical adjacent code digits
SELECT daitch_mokotoff('Cherkassy');
SELECT daitch_mokotoff('Kleinman');
-- More than one word
SELECT daitch_mokotoff('Nowy Targ');
-- Padded with "0"
SELECT daitch_mokotoff('Berlin');
-- Other examples from https://www.avotaynu.com/soundex.htm
SELECT daitch_mokotoff('Ceniow');
SELECT daitch_mokotoff('Tsenyuv');
SELECT daitch_mokotoff('Holubica');
SELECT daitch_mokotoff('Golubitsa');
SELECT daitch_mokotoff('Przemysl');
SELECT daitch_mokotoff('Pshemeshil');
SELECT daitch_mokotoff('Rosochowaciec');
SELECT daitch_mokotoff('Rosokhovatsets');
-- Ignored characters
SELECT daitch_mokotoff('''OBrien');
SELECT daitch_mokotoff('O''Brien');
-- "Difficult" cases, likely to cause trouble for other implementations.
SELECT daitch_mokotoff('CJC');
SELECT daitch_mokotoff('BESST');
SELECT daitch_mokotoff('BOUEY');
SELECT daitch_mokotoff('HANNMANN');
SELECT daitch_mokotoff('MCCOYJR');
SELECT daitch_mokotoff('ACCURSO');
SELECT daitch_mokotoff('BIERSCHBACH');