1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-24 09:27:52 +03:00
Files
postgres/src/backend/access/nbtree/nbtcompare.c
Tom Lane 0dc6bf633a Allow btree comparison functions to return INT_MIN.
Historically we forbade datatype-specific comparison functions from
returning INT_MIN, so that it would be safe to invert the sort order
just by negating the comparison result.  However, this was never
really safe for comparison functions that directly return the result
of memcmp(), strcmp(), etc, as POSIX doesn't place any such restriction
on those library functions.  Buildfarm results show that at least on
recent Linux on s390x, memcmp() actually does return INT_MIN sometimes,
causing sort failures.

The agreed-on answer is to remove this restriction and fix relevant
call sites to not make such an assumption; code such as "res = -res"
should be replaced by "INVERT_COMPARE_RESULT(res)".  The same is needed
in a few places that just directly negated the result of memcmp or
strcmp.

To help find places having this problem, I've also added a compile option
to nbtcompare.c that causes some of the commonly used comparators to
return INT_MIN/INT_MAX instead of their usual -1/+1.  It'd likely be
a good idea to have at least one buildfarm member running with
"-DSTRESS_SORT_INT_MIN".  That's far from a complete test of course,
but it should help to prevent fresh introductions of such bugs.

This is a longstanding portability hazard, so back-patch to all supported
branches.

Discussion: https://postgr.es/m/20180928185215.ffoq2xrq5d3pafna@alap3.anarazel.de
2018-10-05 16:01:30 -04:00

363 lines
7.7 KiB
C

/*-------------------------------------------------------------------------
*
* nbtcompare.c
* Comparison functions for btree access method.
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/access/nbtree/nbtcompare.c
*
* NOTES
*
* These functions are stored in pg_amproc. For each operator class
* defined on btrees, they compute
*
* compare(a, b):
* < 0 if a < b,
* = 0 if a == b,
* > 0 if a > b.
*
* The result is always an int32 regardless of the input datatype.
*
* Although any negative int32 is acceptable for reporting "<",
* and any positive int32 is acceptable for reporting ">", routines
* that work on 32-bit or wider datatypes can't just return "a - b".
* That could overflow and give the wrong answer.
*
* NOTE: it is critical that the comparison function impose a total order
* on all non-NULL values of the data type, and that the datatype's
* boolean comparison operators (= < >= etc) yield results consistent
* with the comparison routine. Otherwise bad behavior may ensue.
* (For example, the comparison operators must NOT punt when faced with
* NAN or other funny values; you must devise some collation sequence for
* all such values.) If the datatype is not trivial, this is most
* reliably done by having the boolean operators invoke the same
* three-way comparison code that the btree function does. Therefore,
* this file contains only btree support for "trivial" datatypes ---
* all others are in the /utils/adt/ files that implement their datatypes.
*
* NOTE: these routines must not leak memory, since memory allocated
* during an index access won't be recovered till end of query. This
* primarily affects comparison routines for toastable datatypes;
* they have to be careful to free any detoasted copy of an input datum.
*
* NOTE: we used to forbid comparison functions from returning INT_MIN,
* but that proves to be too error-prone because some platforms' versions
* of memcmp() etc can return INT_MIN. As a means of stress-testing
* callers, this file can be compiled with STRESS_SORT_INT_MIN defined
* to cause many of these functions to return INT_MIN or INT_MAX instead of
* their customary -1/+1. For production, though, that's not a good idea
* since users or third-party code might expect the traditional results.
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <limits.h>
#include "utils/builtins.h"
#include "utils/sortsupport.h"
#ifdef STRESS_SORT_INT_MIN
#define A_LESS_THAN_B INT_MIN
#define A_GREATER_THAN_B INT_MAX
#else
#define A_LESS_THAN_B (-1)
#define A_GREATER_THAN_B 1
#endif
Datum
btboolcmp(PG_FUNCTION_ARGS)
{
bool a = PG_GETARG_BOOL(0);
bool b = PG_GETARG_BOOL(1);
PG_RETURN_INT32((int32) a - (int32) b);
}
Datum
btint2cmp(PG_FUNCTION_ARGS)
{
int16 a = PG_GETARG_INT16(0);
int16 b = PG_GETARG_INT16(1);
PG_RETURN_INT32((int32) a - (int32) b);
}
static int
btint2fastcmp(Datum x, Datum y, SortSupport ssup)
{
int16 a = DatumGetInt16(x);
int16 b = DatumGetInt16(y);
return (int) a - (int) b;
}
Datum
btint2sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = btint2fastcmp;
PG_RETURN_VOID();
}
Datum
btint4cmp(PG_FUNCTION_ARGS)
{
int32 a = PG_GETARG_INT32(0);
int32 b = PG_GETARG_INT32(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
static int
btint4fastcmp(Datum x, Datum y, SortSupport ssup)
{
int32 a = DatumGetInt32(x);
int32 b = DatumGetInt32(y);
if (a > b)
return A_GREATER_THAN_B;
else if (a == b)
return 0;
else
return A_LESS_THAN_B;
}
Datum
btint4sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = btint4fastcmp;
PG_RETURN_VOID();
}
Datum
btint8cmp(PG_FUNCTION_ARGS)
{
int64 a = PG_GETARG_INT64(0);
int64 b = PG_GETARG_INT64(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
static int
btint8fastcmp(Datum x, Datum y, SortSupport ssup)
{
int64 a = DatumGetInt64(x);
int64 b = DatumGetInt64(y);
if (a > b)
return A_GREATER_THAN_B;
else if (a == b)
return 0;
else
return A_LESS_THAN_B;
}
Datum
btint8sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = btint8fastcmp;
PG_RETURN_VOID();
}
Datum
btint48cmp(PG_FUNCTION_ARGS)
{
int32 a = PG_GETARG_INT32(0);
int64 b = PG_GETARG_INT64(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
Datum
btint84cmp(PG_FUNCTION_ARGS)
{
int64 a = PG_GETARG_INT64(0);
int32 b = PG_GETARG_INT32(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
Datum
btint24cmp(PG_FUNCTION_ARGS)
{
int16 a = PG_GETARG_INT16(0);
int32 b = PG_GETARG_INT32(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
Datum
btint42cmp(PG_FUNCTION_ARGS)
{
int32 a = PG_GETARG_INT32(0);
int16 b = PG_GETARG_INT16(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
Datum
btint28cmp(PG_FUNCTION_ARGS)
{
int16 a = PG_GETARG_INT16(0);
int64 b = PG_GETARG_INT64(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
Datum
btint82cmp(PG_FUNCTION_ARGS)
{
int64 a = PG_GETARG_INT64(0);
int16 b = PG_GETARG_INT16(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
Datum
btoidcmp(PG_FUNCTION_ARGS)
{
Oid a = PG_GETARG_OID(0);
Oid b = PG_GETARG_OID(1);
if (a > b)
PG_RETURN_INT32(A_GREATER_THAN_B);
else if (a == b)
PG_RETURN_INT32(0);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
static int
btoidfastcmp(Datum x, Datum y, SortSupport ssup)
{
Oid a = DatumGetObjectId(x);
Oid b = DatumGetObjectId(y);
if (a > b)
return A_GREATER_THAN_B;
else if (a == b)
return 0;
else
return A_LESS_THAN_B;
}
Datum
btoidsortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = btoidfastcmp;
PG_RETURN_VOID();
}
Datum
btoidvectorcmp(PG_FUNCTION_ARGS)
{
oidvector *a = (oidvector *) PG_GETARG_POINTER(0);
oidvector *b = (oidvector *) PG_GETARG_POINTER(1);
int i;
/* We arbitrarily choose to sort first by vector length */
if (a->dim1 != b->dim1)
PG_RETURN_INT32(a->dim1 - b->dim1);
for (i = 0; i < a->dim1; i++)
{
if (a->values[i] != b->values[i])
{
if (a->values[i] > b->values[i])
PG_RETURN_INT32(A_GREATER_THAN_B);
else
PG_RETURN_INT32(A_LESS_THAN_B);
}
}
PG_RETURN_INT32(0);
}
Datum
btcharcmp(PG_FUNCTION_ARGS)
{
char a = PG_GETARG_CHAR(0);
char b = PG_GETARG_CHAR(1);
/* Be careful to compare chars as unsigned */
PG_RETURN_INT32((int32) ((uint8) a) - (int32) ((uint8) b));
}
Datum
btnamecmp(PG_FUNCTION_ARGS)
{
Name a = PG_GETARG_NAME(0);
Name b = PG_GETARG_NAME(1);
PG_RETURN_INT32(strncmp(NameStr(*a), NameStr(*b), NAMEDATALEN));
}
static int
btnamefastcmp(Datum x, Datum y, SortSupport ssup)
{
Name a = DatumGetName(x);
Name b = DatumGetName(y);
return strncmp(NameStr(*a), NameStr(*b), NAMEDATALEN);
}
Datum
btnamesortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = btnamefastcmp;
PG_RETURN_VOID();
}