mirror of
https://github.com/postgres/postgres.git
synced 2025-11-10 17:42:29 +03:00
Indexing support for pattern matching operations via separate operator
class when lc_collate is not C.
This commit is contained in:
@@ -14,7 +14,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/name.c,v 1.45 2003/05/09 21:19:49 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/name.c,v 1.46 2003/05/15 15:50:18 petere Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -182,6 +182,65 @@ namege(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* comparison routines for LIKE indexing support
|
||||
*/
|
||||
|
||||
Datum
|
||||
name_pattern_eq(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Name arg1 = PG_GETARG_NAME(0);
|
||||
Name arg2 = PG_GETARG_NAME(1);
|
||||
|
||||
PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0);
|
||||
}
|
||||
|
||||
Datum
|
||||
name_pattern_ne(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Name arg1 = PG_GETARG_NAME(0);
|
||||
Name arg2 = PG_GETARG_NAME(1);
|
||||
|
||||
PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0);
|
||||
}
|
||||
|
||||
Datum
|
||||
name_pattern_lt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Name arg1 = PG_GETARG_NAME(0);
|
||||
Name arg2 = PG_GETARG_NAME(1);
|
||||
|
||||
PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) < 0);
|
||||
}
|
||||
|
||||
Datum
|
||||
name_pattern_le(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Name arg1 = PG_GETARG_NAME(0);
|
||||
Name arg2 = PG_GETARG_NAME(1);
|
||||
|
||||
PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) <= 0);
|
||||
}
|
||||
|
||||
Datum
|
||||
name_pattern_gt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Name arg1 = PG_GETARG_NAME(0);
|
||||
Name arg2 = PG_GETARG_NAME(1);
|
||||
|
||||
PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) > 0);
|
||||
}
|
||||
|
||||
Datum
|
||||
name_pattern_ge(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Name arg1 = PG_GETARG_NAME(0);
|
||||
Name arg2 = PG_GETARG_NAME(1);
|
||||
|
||||
PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) >= 0);
|
||||
}
|
||||
|
||||
|
||||
/* (see char.c for comparison/operation routines) */
|
||||
|
||||
int
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.136 2003/04/16 04:37:58 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.137 2003/05/15 15:50:18 petere Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -180,8 +180,6 @@ static void get_join_vars(List *args, Var **var1, Var **var2);
|
||||
static Selectivity prefix_selectivity(Query *root, Var *var, Oid vartype,
|
||||
Const *prefix);
|
||||
static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
|
||||
static bool string_lessthan(const char *str1, const char *str2,
|
||||
Oid datatype);
|
||||
static Oid find_operator(const char *opname, Oid datatype);
|
||||
static Datum string_to_datum(const char *str, Oid datatype);
|
||||
static Const *string_to_const(const char *str, Oid datatype);
|
||||
@@ -3619,51 +3617,21 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
|
||||
|
||||
|
||||
/*
|
||||
* We want to test whether the database's LC_COLLATE setting is safe for
|
||||
* LIKE/regexp index optimization.
|
||||
* Try to generate a string greater than the given string or any
|
||||
* string it is a prefix of. If successful, return a palloc'd string;
|
||||
* else return NULL.
|
||||
*
|
||||
* The key requirement here is that given a prefix string, say "foo",
|
||||
* we must be able to generate another string "fop" that is greater
|
||||
* than all strings "foobar" starting with "foo". Unfortunately, a
|
||||
* non-C locale may have arbitrary collation rules in which "fop" >
|
||||
* "foo" is not sufficient to ensure "fop" > "foobar". Until we can
|
||||
* come up with a more bulletproof way of generating the upper-bound
|
||||
* string, the optimization is disabled in all non-C locales.
|
||||
* than all strings "foobar" starting with "foo".
|
||||
*
|
||||
* (In theory, locales other than C may be LIKE-safe so this function
|
||||
* could be different from lc_collate_is_c(), but in a different
|
||||
* theory, non-C locales are completely unpredictable so it's unlikely
|
||||
* to happen.)
|
||||
* If we max out the righthand byte, truncate off the last character
|
||||
* and start incrementing the next. For example, if "z" were the last
|
||||
* character in the sort order, then we could produce "foo" as a
|
||||
* string greater than "fonz".
|
||||
*
|
||||
* Be sure to maintain the correspondence with the code in initdb.
|
||||
*/
|
||||
bool
|
||||
locale_is_like_safe(void)
|
||||
{
|
||||
return lc_collate_is_c();
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to generate a string greater than the given string or any string it is
|
||||
* a prefix of. If successful, return a palloc'd string; else return NULL.
|
||||
*
|
||||
* To work correctly in non-ASCII locales with weird collation orders,
|
||||
* we cannot simply increment "foo" to "fop" --- we have to check whether
|
||||
* we actually produced a string greater than the given one. If not,
|
||||
* increment the righthand byte again and repeat. If we max out the righthand
|
||||
* byte, truncate off the last character and start incrementing the next.
|
||||
* For example, if "z" were the last character in the sort order, then we
|
||||
* could produce "foo" as a string greater than "fonz".
|
||||
*
|
||||
* This could be rather slow in the worst case, but in most cases we won't
|
||||
* have to try more than one or two strings before succeeding.
|
||||
*
|
||||
* XXX this is actually not sufficient, since it only copes with the case
|
||||
* where individual characters collate in an order different from their
|
||||
* numeric code assignments. It does not handle cases where there are
|
||||
* cross-character effects, such as specially sorted digraphs, multiple
|
||||
* sort passes, etc. For now, we just shut down the whole thing in locales
|
||||
* that do such things :-(
|
||||
* This could be rather slow in the worst case, but in most cases we
|
||||
* won't have to try more than one or two strings before succeeding.
|
||||
*/
|
||||
Const *
|
||||
make_greater_string(const Const *str_const)
|
||||
@@ -3699,18 +3667,16 @@ make_greater_string(const Const *str_const)
|
||||
/*
|
||||
* Try to generate a larger string by incrementing the last byte.
|
||||
*/
|
||||
while (*lastchar < (unsigned char) 255)
|
||||
if (*lastchar < (unsigned char) 255)
|
||||
{
|
||||
(*lastchar)++;
|
||||
if (string_lessthan(str, workstr, datatype))
|
||||
{
|
||||
/* Success! */
|
||||
Const *workstr_const = string_to_const(workstr, datatype);
|
||||
Const *workstr_const;
|
||||
|
||||
pfree(str);
|
||||
pfree(workstr);
|
||||
return workstr_const;
|
||||
}
|
||||
(*lastchar)++;
|
||||
workstr_const = string_to_const(workstr, datatype);
|
||||
|
||||
pfree(str);
|
||||
pfree(workstr);
|
||||
return workstr_const;
|
||||
}
|
||||
|
||||
/* restore last byte so we don't confuse pg_mbcliplen */
|
||||
@@ -3736,57 +3702,6 @@ make_greater_string(const Const *str_const)
|
||||
return (Const *) NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test whether two strings are "<" according to the rules of the given
|
||||
* datatype. We do this the hard way, ie, actually calling the type's
|
||||
* "<" operator function, to ensure we get the right result...
|
||||
*/
|
||||
static bool
|
||||
string_lessthan(const char *str1, const char *str2, Oid datatype)
|
||||
{
|
||||
Datum datum1 = string_to_datum(str1, datatype);
|
||||
Datum datum2 = string_to_datum(str2, datatype);
|
||||
bool result;
|
||||
|
||||
switch (datatype)
|
||||
{
|
||||
case TEXTOID:
|
||||
result = DatumGetBool(DirectFunctionCall2(text_lt,
|
||||
datum1, datum2));
|
||||
break;
|
||||
|
||||
case BPCHAROID:
|
||||
result = DatumGetBool(DirectFunctionCall2(bpcharlt,
|
||||
datum1, datum2));
|
||||
break;
|
||||
|
||||
case VARCHAROID:
|
||||
result = DatumGetBool(DirectFunctionCall2(varcharlt,
|
||||
datum1, datum2));
|
||||
break;
|
||||
|
||||
case NAMEOID:
|
||||
result = DatumGetBool(DirectFunctionCall2(namelt,
|
||||
datum1, datum2));
|
||||
break;
|
||||
|
||||
case BYTEAOID:
|
||||
result = DatumGetBool(DirectFunctionCall2(bytealt,
|
||||
datum1, datum2));
|
||||
break;
|
||||
|
||||
default:
|
||||
elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
|
||||
pfree(DatumGetPointer(datum1));
|
||||
pfree(DatumGetPointer(datum2));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* See if there is a binary op of the given name for the given datatype */
|
||||
/* NB: we assume that only built-in system operators are searched for */
|
||||
static Oid
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.97 2003/05/09 15:44:40 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.98 2003/05/15 15:50:19 petere Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1050,6 +1050,149 @@ text_smaller(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The following operators support character-by-character comparison
|
||||
* of text data types, to allow building indexes suitable for LIKE
|
||||
* clauses.
|
||||
*/
|
||||
|
||||
static int
|
||||
internal_text_pattern_compare(text *arg1, text *arg2)
|
||||
{
|
||||
int result;
|
||||
|
||||
result = memcmp(VARDATA(arg1), VARDATA(arg2),
|
||||
Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
|
||||
if (result != 0)
|
||||
return result;
|
||||
else if (VARSIZE(arg1) < VARSIZE(arg2))
|
||||
return -1;
|
||||
else if (VARSIZE(arg1) > VARSIZE(arg2))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
text_pattern_lt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *arg1 = PG_GETARG_TEXT_P(0);
|
||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||
int result;
|
||||
|
||||
result = internal_text_pattern_compare(arg1, arg2);
|
||||
|
||||
PG_FREE_IF_COPY(arg1, 0);
|
||||
PG_FREE_IF_COPY(arg2, 1);
|
||||
|
||||
PG_RETURN_BOOL(result < 0);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
text_pattern_le(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *arg1 = PG_GETARG_TEXT_P(0);
|
||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||
int result;
|
||||
|
||||
result = internal_text_pattern_compare(arg1, arg2);
|
||||
|
||||
PG_FREE_IF_COPY(arg1, 0);
|
||||
PG_FREE_IF_COPY(arg2, 1);
|
||||
|
||||
PG_RETURN_BOOL(result <= 0);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
text_pattern_eq(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *arg1 = PG_GETARG_TEXT_P(0);
|
||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||
int result;
|
||||
|
||||
if (VARSIZE(arg1) != VARSIZE(arg2))
|
||||
result = 1;
|
||||
else
|
||||
result = internal_text_pattern_compare(arg1, arg2);
|
||||
|
||||
PG_FREE_IF_COPY(arg1, 0);
|
||||
PG_FREE_IF_COPY(arg2, 1);
|
||||
|
||||
PG_RETURN_BOOL(result == 0);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
text_pattern_ge(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *arg1 = PG_GETARG_TEXT_P(0);
|
||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||
int result;
|
||||
|
||||
result = internal_text_pattern_compare(arg1, arg2);
|
||||
|
||||
PG_FREE_IF_COPY(arg1, 0);
|
||||
PG_FREE_IF_COPY(arg2, 1);
|
||||
|
||||
PG_RETURN_BOOL(result >= 0);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
text_pattern_gt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *arg1 = PG_GETARG_TEXT_P(0);
|
||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||
int result;
|
||||
|
||||
result = internal_text_pattern_compare(arg1, arg2);
|
||||
|
||||
PG_FREE_IF_COPY(arg1, 0);
|
||||
PG_FREE_IF_COPY(arg2, 1);
|
||||
|
||||
PG_RETURN_BOOL(result > 0);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
text_pattern_ne(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *arg1 = PG_GETARG_TEXT_P(0);
|
||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||
int result;
|
||||
|
||||
if (VARSIZE(arg1) != VARSIZE(arg2))
|
||||
result = 1;
|
||||
else
|
||||
result = internal_text_pattern_compare(arg1, arg2);
|
||||
|
||||
PG_FREE_IF_COPY(arg1, 0);
|
||||
PG_FREE_IF_COPY(arg2, 1);
|
||||
|
||||
PG_RETURN_BOOL(result != 0);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
bttext_pattern_cmp(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *arg1 = PG_GETARG_TEXT_P(0);
|
||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||
int result;
|
||||
|
||||
result = internal_text_pattern_compare(arg1, arg2);
|
||||
|
||||
PG_FREE_IF_COPY(arg1, 0);
|
||||
PG_FREE_IF_COPY(arg2, 1);
|
||||
|
||||
PG_RETURN_INT32(result);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------------------
|
||||
* byteaoctetlen
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user