mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Add support for Daitch-Mokotoff Soundex in contrib/fuzzystrmatch.
This modernized version of Soundex works significantly better than the original, particularly for non-English names. Dag Lem, reviewed by quite a few people along the way Discussion: https://postgr.es/m/yger1atbgfy.fsf@sid.nimrod.no
This commit is contained in:
@ -19,3 +19,48 @@ SELECT metaphone('GUMBO', 4);
|
||||
|
||||
SELECT dmetaphone('gumbo');
|
||||
SELECT dmetaphone_alt('gumbo');
|
||||
|
||||
-- Wovels
|
||||
SELECT daitch_mokotoff('Augsburg');
|
||||
SELECT daitch_mokotoff('Breuer');
|
||||
SELECT daitch_mokotoff('Freud');
|
||||
|
||||
-- The letter "H"
|
||||
SELECT daitch_mokotoff('Halberstadt');
|
||||
SELECT daitch_mokotoff('Mannheim');
|
||||
|
||||
-- Adjacent sounds
|
||||
SELECT daitch_mokotoff('Chernowitz');
|
||||
|
||||
-- Adjacent letters with identical adjacent code digits
|
||||
SELECT daitch_mokotoff('Cherkassy');
|
||||
SELECT daitch_mokotoff('Kleinman');
|
||||
|
||||
-- More than one word
|
||||
SELECT daitch_mokotoff('Nowy Targ');
|
||||
|
||||
-- Padded with "0"
|
||||
SELECT daitch_mokotoff('Berlin');
|
||||
|
||||
-- Other examples from https://www.avotaynu.com/soundex.htm
|
||||
SELECT daitch_mokotoff('Ceniow');
|
||||
SELECT daitch_mokotoff('Tsenyuv');
|
||||
SELECT daitch_mokotoff('Holubica');
|
||||
SELECT daitch_mokotoff('Golubitsa');
|
||||
SELECT daitch_mokotoff('Przemysl');
|
||||
SELECT daitch_mokotoff('Pshemeshil');
|
||||
SELECT daitch_mokotoff('Rosochowaciec');
|
||||
SELECT daitch_mokotoff('Rosokhovatsets');
|
||||
|
||||
-- Ignored characters
|
||||
SELECT daitch_mokotoff('''OBrien');
|
||||
SELECT daitch_mokotoff('O''Brien');
|
||||
|
||||
-- "Difficult" cases, likely to cause trouble for other implementations.
|
||||
SELECT daitch_mokotoff('CJC');
|
||||
SELECT daitch_mokotoff('BESST');
|
||||
SELECT daitch_mokotoff('BOUEY');
|
||||
SELECT daitch_mokotoff('HANNMANN');
|
||||
SELECT daitch_mokotoff('MCCOYJR');
|
||||
SELECT daitch_mokotoff('ACCURSO');
|
||||
SELECT daitch_mokotoff('BIERSCHBACH');
|
||||
|
26
contrib/fuzzystrmatch/sql/fuzzystrmatch_utf8.sql
Normal file
26
contrib/fuzzystrmatch/sql/fuzzystrmatch_utf8.sql
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* This test must be run in a database with UTF-8 encoding,
|
||||
* because other encodings don't support all the characters used.
|
||||
*/
|
||||
|
||||
SELECT getdatabaseencoding() <> 'UTF8'
|
||||
AS skip_test \gset
|
||||
\if :skip_test
|
||||
\quit
|
||||
\endif
|
||||
|
||||
set client_encoding = utf8;
|
||||
|
||||
-- CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
|
||||
|
||||
-- Accents
|
||||
SELECT daitch_mokotoff('Müller');
|
||||
SELECT daitch_mokotoff('Schäfer');
|
||||
SELECT daitch_mokotoff('Straßburg');
|
||||
SELECT daitch_mokotoff('Éregon');
|
||||
|
||||
-- Special characters added at https://www.jewishgen.org/InfoFiles/Soundex.html
|
||||
SELECT daitch_mokotoff('gąszczu');
|
||||
SELECT daitch_mokotoff('brzęczy');
|
||||
SELECT daitch_mokotoff('ţamas');
|
||||
SELECT daitch_mokotoff('țamas');
|
Reference in New Issue
Block a user