1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-18 12:22:09 +03:00

Adjust string comparison so that only bitwise-equal strings are considered

equal: if strcoll claims two strings are equal, check it with strcmp, and
sort according to strcmp if not identical.  This fixes inconsistent
behavior under glibc's hu_HU locale, and probably under some other locales
as well.  Also, take advantage of the now-well-defined behavior to speed up
texteq, textne, bpchareq, bpcharne: they may as well just do a bitwise
comparison and not bother with strcoll at all.

NOTE: affected databases may need to REINDEX indexes on text columns to be
sure they are self-consistent.
This commit is contained in:
Tom Lane
2005-12-22 22:50:14 +00:00
parent ba6e1b2e72
commit faefc812f8
2 changed files with 36 additions and 11 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.108 2004/12/31 22:01:22 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.108.4.1 2005/12/22 22:50:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -551,11 +551,14 @@ bpchareq(PG_FUNCTION_ARGS)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
/* fast path for different-length inputs */
/*
* Since we only care about equality or not-equality, we can avoid all
* the expense of strcoll() here, and just do bitwise comparison.
*/
if (len1 != len2)
result = false;
else
result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) == 0);
result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -575,11 +578,14 @@ bpcharne(PG_FUNCTION_ARGS)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
/* fast path for different-length inputs */
/*
* Since we only care about equality or not-equality, we can avoid all
* the expense of strcoll() here, and just do bitwise comparison.
*/
if (len1 != len2)
result = true;
else
result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) != 0);
result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -692,7 +698,9 @@ bpcharcmp(PG_FUNCTION_ARGS)
* bpchar needs a specialized hash function because we want to ignore
* trailing blanks in comparisons.
*
* XXX is there any need for locale-specific behavior here?
* Note: currently there is no need for locale-specific behavior here,
* but if we ever change the semantics of bpchar comparison to trust
* strcoll() completely, we'd need to do something different in non-C locales.
*/
Datum
hashbpchar(PG_FUNCTION_ARGS)

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.118 2004/12/31 22:01:22 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.118.4.1 2005/12/22 22:50:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -879,6 +879,15 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
result = strcoll(a1p, a2p);
/*
* In some locales strcoll() can claim that nonidentical strings are
* equal. Believing that would be bad news for a number of reasons,
* so we follow Perl's lead and sort "equal" strings according to
* strcmp().
*/
if (result == 0)
result = strcmp(a1p, a2p);
if (len1 >= STACKBUFLEN)
pfree(a1p);
if (len2 >= STACKBUFLEN)
@@ -931,11 +940,15 @@ texteq(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_P(1);
bool result;
/* fast path for different-length inputs */
/*
* Since we only care about equality or not-equality, we can avoid all
* the expense of strcoll() here, and just do bitwise comparison.
*/
if (VARSIZE(arg1) != VARSIZE(arg2))
result = false;
else
result = (text_cmp(arg1, arg2) == 0);
result = (strncmp(VARDATA(arg1), VARDATA(arg2),
VARSIZE(arg1) - VARHDRSZ) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -950,11 +963,15 @@ textne(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_P(1);
bool result;
/* fast path for different-length inputs */
/*
* Since we only care about equality or not-equality, we can avoid all
* the expense of strcoll() here, and just do bitwise comparison.
*/
if (VARSIZE(arg1) != VARSIZE(arg2))
result = true;
else
result = (text_cmp(arg1, arg2) != 0);
result = (strncmp(VARDATA(arg1), VARDATA(arg2),
VARSIZE(arg1) - VARHDRSZ) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);