mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 10:30:33 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			174 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			174 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * fuzzystrmatch.h
 | |
|  *
 | |
|  * Functions for "fuzzy" comparison of strings
 | |
|  *
 | |
|  * Joe Conway <mail@joeconway.com>
 | |
|  *
 | |
|  * $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.h,v 1.16 2008/01/01 19:45:45 momjian Exp $
 | |
|  * Copyright (c) 2001-2008, PostgreSQL Global Development Group
 | |
|  * ALL RIGHTS RESERVED;
 | |
|  *
 | |
|  * levenshtein()
 | |
|  * -------------
 | |
|  * Written based on a description of the algorithm by Michael Gilleland
 | |
|  * found at http://www.merriampark.com/ld.htm
 | |
|  * Also looked at levenshtein.c in the PHP 4.0.6 distribution for
 | |
|  * inspiration.
 | |
|  *
 | |
|  * metaphone()
 | |
|  * -----------
 | |
|  * Modified for PostgreSQL by Joe Conway.
 | |
|  * Based on CPAN's "Text-Metaphone-1.96" by Michael G Schwern <schwern@pobox.com>
 | |
|  * Code slightly modified for use as PostgreSQL function (palloc, elog, etc).
 | |
|  * Metaphone was originally created by Lawrence Philips and presented in article
 | |
|  * in "Computer Language" December 1990 issue.
 | |
|  *
 | |
|  * Permission to use, copy, modify, and distribute this software and its
 | |
|  * documentation for any purpose, without fee, and without a written agreement
 | |
|  * is hereby granted, provided that the above copyright notice and this
 | |
|  * paragraph and the following two paragraphs appear in all copies.
 | |
|  *
 | |
|  * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR
 | |
|  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
 | |
|  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
 | |
|  * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE
 | |
|  * POSSIBILITY OF SUCH DAMAGE.
 | |
|  *
 | |
|  * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
 | |
|  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 | |
|  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 | |
|  * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO
 | |
|  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #ifndef FUZZYSTRMATCH_H
 | |
| #define FUZZYSTRMATCH_H
 | |
| 
 | |
| #include "postgres.h"
 | |
| 
 | |
| #include <ctype.h>
 | |
| 
 | |
| #include "fmgr.h"
 | |
| #include "utils/builtins.h"
 | |
| 
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * External declarations
 | |
|  */
 | |
| extern Datum levenshtein(PG_FUNCTION_ARGS);
 | |
| extern Datum metaphone(PG_FUNCTION_ARGS);
 | |
| extern Datum soundex(PG_FUNCTION_ARGS);
 | |
| extern Datum difference(PG_FUNCTION_ARGS);
 | |
| 
 | |
| /*
 | |
|  * Soundex
 | |
|  */
 | |
| static void _soundex(const char *instr, char *outstr);
 | |
| 
 | |
| #define SOUNDEX_LEN 4
 | |
| #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
 | |
| #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
 | |
| 
 | |
| /*									ABCDEFGHIJKLMNOPQRSTUVWXYZ */
 | |
| static const char *soundex_table = "01230120022455012623010202";
 | |
| 
 | |
| #define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Levenshtein
 | |
|  */
 | |
| #define MAX_LEVENSHTEIN_STRLEN		255
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Metaphone
 | |
|  */
 | |
| #define MAX_METAPHONE_STRLEN		255
 | |
| 
 | |
| /*
 | |
|  * Original code by Michael G Schwern starts here.
 | |
|  * Code slightly modified for use as PostgreSQL
 | |
|  * function (combined *.h into here).
 | |
|  *------------------------------------------------------------------*/
 | |
| 
 | |
| /**************************************************************************
 | |
| 	metaphone -- Breaks english phrases down into their phonemes.
 | |
| 
 | |
| 	Input
 | |
| 		word			--	An english word to be phonized
 | |
| 		max_phonemes	--	How many phonemes to calculate.  If 0, then it
 | |
| 							will phonize the entire phrase.
 | |
| 		phoned_word		--	The final phonized word.  (We'll allocate the
 | |
| 							memory.)
 | |
| 	Output
 | |
| 		error	--	A simple error flag, returns TRUE or FALSE
 | |
| 
 | |
| 	NOTES:	ALL non-alpha characters are ignored, this includes whitespace,
 | |
| 	although non-alpha characters will break up phonemes.
 | |
| ****************************************************************************/
 | |
| 
 | |
| 
 | |
| /**************************************************************************
 | |
| 	my constants -- constants I like
 | |
| 
 | |
| 	Probably redundant.
 | |
| 
 | |
| ***************************************************************************/
 | |
| 
 | |
| #define META_ERROR			FALSE
 | |
| #define META_SUCCESS		TRUE
 | |
| #define META_FAILURE		FALSE
 | |
| 
 | |
| 
 | |
| /*	I add modifications to the traditional metaphone algorithm that you
 | |
| 	might find in books.  Define this if you want metaphone to behave
 | |
| 	traditionally */
 | |
| #undef USE_TRADITIONAL_METAPHONE
 | |
| 
 | |
| /* Special encodings */
 | |
| #define  SH		'X'
 | |
| #define  TH		'0'
 | |
| 
 | |
| char		Lookahead(char *word, int how_far);
 | |
| int
 | |
| _metaphone(
 | |
|  /* IN */
 | |
| 		   char *word,
 | |
| 		   int max_phonemes,
 | |
|  /* OUT */
 | |
| 		   char **phoned_word
 | |
| );
 | |
| 
 | |
| /* Metachar.h ... little bits about characters for metaphone */
 | |
| 
 | |
| 
 | |
| /*-- Character encoding array & accessing macros --*/
 | |
| /* Stolen directly out of the book... */
 | |
| char		_codes[26] = {
 | |
| 	1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
 | |
| /*	a  b c	d e f g  h i j k l m n o p q r s t u v w x y z */
 | |
| };
 | |
| 
 | |
| 
 | |
| #define ENCODE(c) (isalpha((unsigned char) (c)) ? _codes[((toupper((unsigned char) (c))) - 'A')] : 0)
 | |
| 
 | |
| #define isvowel(c)	(ENCODE(c) & 1)		/* AEIOU */
 | |
| 
 | |
| /* These letters are passed through unchanged */
 | |
| #define NOCHANGE(c) (ENCODE(c) & 2)		/* FJMNR */
 | |
| 
 | |
| /* These form dipthongs when preceding H */
 | |
| #define AFFECTH(c)	(ENCODE(c) & 4)		/* CGPST */
 | |
| 
 | |
| /* These make C and G soft */
 | |
| #define MAKESOFT(c) (ENCODE(c) & 8)		/* EIY */
 | |
| 
 | |
| /* These prevent GH from becoming F */
 | |
| #define NOGHTOF(c)	(ENCODE(c) & 16)	/* BDH */
 | |
| 
 | |
| #endif   /* FUZZYSTRMATCH_H */
 |