1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-19 17:02:53 +03:00

ltree: fix case-insensitive matching.

Previously, ltree_prefix_eq_ci() used lowercasing with the default
collation; while ltree_crc32_sz() used tolower() directly. These were
equivalent only if the default collation provider was libc and the
encoding was single-byte.

Change both to use casefolding with the default collation.

Backpatch through 18, where the casefolding APIs were introduced. The
bug exists in earlier versions, but would require some adaptation.

A REINDEX is required for ltree indexes where the database default
collation is not libc.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Backpatch-through: 18
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
Discussion: https://postgr.es/m/01fc00fd66f641b9693d4f9f1af0ccf44cbdfbdf.camel@j-davis.com
This commit is contained in:
Jeff Davis
2025-12-16 11:13:17 -08:00
parent f79e239e0b
commit 806555e300
3 changed files with 91 additions and 13 deletions

View File

@@ -10,15 +10,46 @@
#include "postgres.h"
#include "ltree.h"
#ifdef LOWER_NODE
#include <ctype.h>
#define TOLOWER(x) tolower((unsigned char) (x))
#else
#define TOLOWER(x) (x)
#endif
#include "crc32.h"
#include "utils/pg_crc.h"
#ifdef LOWER_NODE
#include "catalog/pg_collation.h"
#include "utils/pg_locale.h"
#endif
#ifdef LOWER_NODE
unsigned int
ltree_crc32_sz(const char *buf, int size)
{
pg_crc32 crc;
const char *p = buf;
static pg_locale_t locale = NULL;
if (!locale)
locale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
INIT_TRADITIONAL_CRC32(crc);
while (size > 0)
{
char foldstr[UNICODE_CASEMAP_BUFSZ];
int srclen = pg_mblen(p);
size_t foldlen;
/* fold one codepoint at a time */
foldlen = pg_strfold(foldstr, UNICODE_CASEMAP_BUFSZ, p, srclen,
locale);
COMP_TRADITIONAL_CRC32(crc, foldstr, foldlen);
size -= srclen;
p += srclen;
}
FIN_TRADITIONAL_CRC32(crc);
return (unsigned int) crc;
}
#else
unsigned int
ltree_crc32_sz(const char *buf, int size)
@@ -29,12 +60,12 @@ ltree_crc32_sz(const char *buf, int size)
INIT_TRADITIONAL_CRC32(crc);
while (size > 0)
{
char c = (char) TOLOWER(*p);
COMP_TRADITIONAL_CRC32(crc, &c, 1);
COMP_TRADITIONAL_CRC32(crc, p, 1);
size--;
p++;
}
FIN_TRADITIONAL_CRC32(crc);
return (unsigned int) crc;
}
#endif /* !LOWER_NODE */

View File

@@ -93,11 +93,44 @@ ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz)
bool
ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz)
{
char *al = str_tolower(a, a_sz, DEFAULT_COLLATION_OID);
char *bl = str_tolower(b, b_sz, DEFAULT_COLLATION_OID);
static pg_locale_t locale = NULL;
size_t al_sz = a_sz + 1;
size_t al_len;
char *al = palloc(al_sz);
size_t bl_sz = b_sz + 1;
size_t bl_len;
char *bl = palloc(bl_sz);
bool res;
res = (strncmp(al, bl, a_sz) == 0);
if (!locale)
locale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
/* casefold both a and b */
al_len = pg_strfold(al, al_sz, a, a_sz, locale);
if (al_len + 1 > al_sz)
{
/* grow buffer if needed and retry */
al_sz = al_len + 1;
al = repalloc(al, al_sz);
al_len = pg_strfold(al, al_sz, a, a_sz, locale);
Assert(al_len + 1 <= al_sz);
}
bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
if (bl_len + 1 > bl_sz)
{
/* grow buffer if needed and retry */
bl_sz = bl_len + 1;
bl = repalloc(bl, bl_sz);
bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
Assert(bl_len + 1 <= bl_sz);
}
if (al_len > bl_len)
res = false;
else
res = (strncmp(al, bl, al_len) == 0);
pfree(al);
pfree(bl);

View File

@@ -24,6 +24,20 @@
/* use for libc locale names */
#define LOCALE_NAME_BUFLEN 128
/*
* Maximum number of bytes needed to map a single codepoint. Useful for
* mapping and processing a single input codepoint at a time with a
* statically-allocated buffer.
*
* With full case mapping, an input codepoint may be mapped to as many as
* three output codepoints. See Unicode 16.0.0, section 5.18.2, "Change in
* Length":
*
* https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G29675
*/
#define UNICODE_CASEMAP_LEN 3
#define UNICODE_CASEMAP_BUFSZ (UNICODE_CASEMAP_LEN * MAX_MULTIBYTE_CHAR_LEN)
/* GUC settings */
extern PGDLLIMPORT char *locale_messages;
extern PGDLLIMPORT char *locale_monetary;