1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-19 17:02:53 +03:00

ltree: fix case-insensitive matching.

Previously, ltree_prefix_eq_ci() used lowercasing with the default
collation; while ltree_crc32_sz() used tolower() directly. These were
equivalent only if the default collation provider was libc and the
encoding was single-byte.

Change both to use casefolding with the default collation.

Backpatch through 18, where the casefolding APIs were introduced. The
bug exists in earlier versions, but would require some adaptation.

A REINDEX is required for ltree indexes where the database default
collation is not libc.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Backpatch-through: 18
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
Discussion: https://postgr.es/m/01fc00fd66f641b9693d4f9f1af0ccf44cbdfbdf.camel@j-davis.com
This commit is contained in:
Jeff Davis
2025-12-16 11:13:17 -08:00
parent 24bf379cb1
commit 7f007e4a04
2 changed files with 76 additions and 13 deletions

View File

@@ -10,15 +10,45 @@
#include "postgres.h" #include "postgres.h"
#include "ltree.h" #include "ltree.h"
#ifdef LOWER_NODE
#include <ctype.h>
#define TOLOWER(x) tolower((unsigned char) (x))
#else
#define TOLOWER(x) (x)
#endif
#include "crc32.h" #include "crc32.h"
#include "utils/pg_crc.h" #include "utils/pg_crc.h"
#ifdef LOWER_NODE
#include "utils/pg_locale.h"
#endif
#ifdef LOWER_NODE
unsigned int
ltree_crc32_sz(const char *buf, int size)
{
pg_crc32 crc;
const char *p = buf;
static pg_locale_t locale = NULL;
if (!locale)
locale = pg_database_locale();
INIT_TRADITIONAL_CRC32(crc);
while (size > 0)
{
char foldstr[UNICODE_CASEMAP_BUFSZ];
int srclen = pg_mblen(p);
size_t foldlen;
/* fold one codepoint at a time */
foldlen = pg_strfold(foldstr, UNICODE_CASEMAP_BUFSZ, p, srclen,
locale);
COMP_TRADITIONAL_CRC32(crc, foldstr, foldlen);
size -= srclen;
p += srclen;
}
FIN_TRADITIONAL_CRC32(crc);
return (unsigned int) crc;
}
#else
unsigned int unsigned int
ltree_crc32_sz(const char *buf, int size) ltree_crc32_sz(const char *buf, int size)
@@ -29,12 +59,12 @@ ltree_crc32_sz(const char *buf, int size)
INIT_TRADITIONAL_CRC32(crc); INIT_TRADITIONAL_CRC32(crc);
while (size > 0) while (size > 0)
{ {
char c = (char) TOLOWER(*p); COMP_TRADITIONAL_CRC32(crc, p, 1);
COMP_TRADITIONAL_CRC32(crc, &c, 1);
size--; size--;
p++; p++;
} }
FIN_TRADITIONAL_CRC32(crc); FIN_TRADITIONAL_CRC32(crc);
return (unsigned int) crc; return (unsigned int) crc;
} }
#endif /* !LOWER_NODE */

View File

@@ -93,11 +93,44 @@ ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz)
bool bool
ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz) ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz)
{ {
char *al = str_tolower(a, a_sz, DEFAULT_COLLATION_OID); static pg_locale_t locale = NULL;
char *bl = str_tolower(b, b_sz, DEFAULT_COLLATION_OID); size_t al_sz = a_sz + 1;
size_t al_len;
char *al = palloc(al_sz);
size_t bl_sz = b_sz + 1;
size_t bl_len;
char *bl = palloc(bl_sz);
bool res; bool res;
res = (strncmp(al, bl, a_sz) == 0); if (!locale)
locale = pg_database_locale();
/* casefold both a and b */
al_len = pg_strfold(al, al_sz, a, a_sz, locale);
if (al_len + 1 > al_sz)
{
/* grow buffer if needed and retry */
al_sz = al_len + 1;
al = repalloc(al, al_sz);
al_len = pg_strfold(al, al_sz, a, a_sz, locale);
Assert(al_len + 1 <= al_sz);
}
bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
if (bl_len + 1 > bl_sz)
{
/* grow buffer if needed and retry */
bl_sz = bl_len + 1;
bl = repalloc(bl, bl_sz);
bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
Assert(bl_len + 1 <= bl_sz);
}
if (al_len > bl_len)
res = false;
else
res = (strncmp(al, bl, al_len) == 0);
pfree(al); pfree(al);
pfree(bl); pfree(bl);