1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Move the guts of our Levenshtein implementation into core.

The hope is that we can use this to produce better diagnostics in
some cases.

Peter Geoghegan, reviewed by Michael Paquier, with some further
changes by me.
This commit is contained in:
Robert Haas
2014-11-13 12:25:10 -05:00
parent 1d69ae419d
commit c0828b78e9
6 changed files with 118 additions and 79 deletions

View File

@ -154,23 +154,6 @@ getcode(char c)
/* These prevent GH from becoming F */
#define NOGHTOF(c) (getcode(c) & 16) /* BDH */
/* Faster than memcmp(), for this use case. */
static inline bool
rest_of_char_same(const char *s1, const char *s2, int len)
{
while (len > 0)
{
len--;
if (s1[len] != s2[len])
return false;
}
return true;
}
#include "levenshtein.c"
#define LEVENSHTEIN_LESS_EQUAL
#include "levenshtein.c"
PG_FUNCTION_INFO_V1(levenshtein_with_costs);
Datum
levenshtein_with_costs(PG_FUNCTION_ARGS)
@ -180,8 +163,20 @@ levenshtein_with_costs(PG_FUNCTION_ARGS)
int ins_c = PG_GETARG_INT32(2);
int del_c = PG_GETARG_INT32(3);
int sub_c = PG_GETARG_INT32(4);
const char *s_data;
const char *t_data;
int s_bytes,
t_bytes;
PG_RETURN_INT32(levenshtein_internal(src, dst, ins_c, del_c, sub_c));
/* Extract a pointer to the actual character data */
s_data = VARDATA_ANY(src);
t_data = VARDATA_ANY(dst);
/* Determine length of each string in bytes and characters */
s_bytes = VARSIZE_ANY_EXHDR(src);
t_bytes = VARSIZE_ANY_EXHDR(dst);
PG_RETURN_INT32(varstr_levenshtein(s_data, s_bytes, t_data, t_bytes, ins_c,
del_c, sub_c));
}
@ -191,8 +186,20 @@ levenshtein(PG_FUNCTION_ARGS)
{
text *src = PG_GETARG_TEXT_PP(0);
text *dst = PG_GETARG_TEXT_PP(1);
const char *s_data;
const char *t_data;
int s_bytes,
t_bytes;
PG_RETURN_INT32(levenshtein_internal(src, dst, 1, 1, 1));
/* Extract a pointer to the actual character data */
s_data = VARDATA_ANY(src);
t_data = VARDATA_ANY(dst);
/* Determine length of each string in bytes and characters */
s_bytes = VARSIZE_ANY_EXHDR(src);
t_bytes = VARSIZE_ANY_EXHDR(dst);
PG_RETURN_INT32(varstr_levenshtein(s_data, s_bytes, t_data, t_bytes, 1, 1,
1));
}
@ -206,8 +213,21 @@ levenshtein_less_equal_with_costs(PG_FUNCTION_ARGS)
int del_c = PG_GETARG_INT32(3);
int sub_c = PG_GETARG_INT32(4);
int max_d = PG_GETARG_INT32(5);
const char *s_data;
const char *t_data;
int s_bytes,
t_bytes;
PG_RETURN_INT32(levenshtein_less_equal_internal(src, dst, ins_c, del_c, sub_c, max_d));
/* Extract a pointer to the actual character data */
s_data = VARDATA_ANY(src);
t_data = VARDATA_ANY(dst);
/* Determine length of each string in bytes and characters */
s_bytes = VARSIZE_ANY_EXHDR(src);
t_bytes = VARSIZE_ANY_EXHDR(dst);
PG_RETURN_INT32(varstr_levenshtein_less_equal(s_data, s_bytes, t_data,
t_bytes, ins_c, del_c,
sub_c, max_d));
}
@ -218,8 +238,20 @@ levenshtein_less_equal(PG_FUNCTION_ARGS)
text *src = PG_GETARG_TEXT_PP(0);
text *dst = PG_GETARG_TEXT_PP(1);
int max_d = PG_GETARG_INT32(2);
const char *s_data;
const char *t_data;
int s_bytes,
t_bytes;
PG_RETURN_INT32(levenshtein_less_equal_internal(src, dst, 1, 1, 1, max_d));
/* Extract a pointer to the actual character data */
s_data = VARDATA_ANY(src);
t_data = VARDATA_ANY(dst);
/* Determine length of each string in bytes and characters */
s_bytes = VARSIZE_ANY_EXHDR(src);
t_bytes = VARSIZE_ANY_EXHDR(dst);
PG_RETURN_INT32(varstr_levenshtein_less_equal(s_data, s_bytes, t_data,
t_bytes, 1, 1, 1, max_d));
}