diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index f70cfe75e9e..7c3ef92cd2e 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -1140,8 +1140,7 @@ SELECT '52093.89'::money::numeric::float8;
advantages in some other database systems, there is no such advantage in
PostgreSQL; in fact
character(n>) is usually the slowest of
- the three because of its additional storage costs and slower
- sorting. In most situations
+ the three because of its additional storage costs. In most situations
text or character varying should be used
instead.
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 0498fef4049..94d6da5eb57 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -17,6 +17,7 @@
#include "access/hash.h"
#include "access/tuptoaster.h"
+#include "catalog/pg_collation.h"
#include "libpq/pqformat.h"
#include "nodes/nodeFuncs.h"
#include "utils/array.h"
@@ -649,14 +650,21 @@ varchartypmodout(PG_FUNCTION_ARGS)
*****************************************************************************/
/* "True" length (not counting trailing blanks) of a BpChar */
-static int
+static inline int
bcTruelen(BpChar *arg)
{
- char *s = VARDATA_ANY(arg);
- int i;
- int len;
+ return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
+}
- len = VARSIZE_ANY_EXHDR(arg);
+int
+bpchartruelen(char *s, int len)
+{
+ int i;
+
+ /*
+ * Note that we rely on the assumption that ' ' is a singleton unit on
+ * every supported multibyte server encoding.
+ */
for (i = len - 1; i >= 0; i--)
{
if (s[i] != ' ')
@@ -858,6 +866,23 @@ bpcharcmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(cmp);
}
+Datum
+bpchar_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport */
+ varstr_sortsupport(ssup, collid, true);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
Datum
bpchar_larger(PG_FUNCTION_ARGS)
{
@@ -926,8 +951,9 @@ hashbpchar(PG_FUNCTION_ARGS)
/*
* The following operators support character-by-character comparison
* of bpchar datums, to allow building indexes suitable for LIKE clauses.
- * Note that the regular bpchareq/bpcharne comparison operators are assumed
- * to be compatible with these!
+ * Note that the regular bpchareq/bpcharne comparison operators, and
+ * regular support functions 1 and 2 with "C" collation are assumed to be
+ * compatible with these!
*/
static int
@@ -1030,3 +1056,20 @@ btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(result);
}
+
+
+Datum
+btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, C_COLLATION_OID, true);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 8683188df13..1a74e5e93c8 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -40,6 +40,7 @@
int bytea_output = BYTEA_OUTPUT_HEX;
typedef struct varlena unknown;
+typedef struct varlena string;
typedef struct
{
@@ -67,13 +68,14 @@ typedef struct
int last_returned; /* Last comparison result (cache) */
bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
bool collate_c;
+ bool bpchar; /* Sorting pbchar, not varchar/text/bytea? */
hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
hyperLogLogState full_card; /* Full key cardinality state */
double prop_card; /* Required cardinality proportion */
#ifdef HAVE_LOCALE_T
pg_locale_t locale;
#endif
-} TextSortSupport;
+} StringSortSupport;
/*
* This should be large enough that most strings will fit, but small enough
@@ -87,12 +89,15 @@ typedef struct
#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
-static void btsortsupport_worker(SortSupport ssup, Oid collid);
-static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
-static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
-static int bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup);
-static Datum bttext_abbrev_convert(Datum original, SortSupport ssup);
-static bool bttext_abbrev_abort(int memtupcount, SortSupport ssup);
+#define DatumGetStringP(X) ((string *) PG_DETOAST_DATUM(X))
+#define DatumGetStringPP(X) ((string *) PG_DETOAST_DATUM_PACKED(X))
+
+static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
+static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
+static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
static int32 text_length(Datum str);
static text *text_catenate(text *t1, text *t2);
static text *text_substring(Datum str,
@@ -1738,19 +1743,30 @@ bttextsortsupport(PG_FUNCTION_ARGS)
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
- btsortsupport_worker(ssup, collid);
+ /* Use generic string SortSupport */
+ varstr_sortsupport(ssup, collid, false);
MemoryContextSwitchTo(oldcontext);
PG_RETURN_VOID();
}
-static void
-btsortsupport_worker(SortSupport ssup, Oid collid)
+/*
+ * Generic sortsupport interface for character type's operator classes.
+ * Includes locale support, and support for BpChar semantics (i.e. removing
+ * trailing spaces before comparison).
+ *
+ * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
+ * same representation. Callers that always use the C collation (e.g.
+ * non-collatable type callers like bytea) may have NUL bytes in their strings;
+ * this will not work with any other collation, though.
+ */
+void
+varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
{
bool abbreviate = ssup->abbreviate;
bool collate_c = false;
- TextSortSupport *tss;
+ StringSortSupport *sss;
#ifdef HAVE_LOCALE_T
pg_locale_t locale = 0;
@@ -1762,20 +1778,25 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
* overhead of a trip through the fmgr layer for every comparison, which
* can be substantial.
*
- * Most typically, we'll set the comparator to bttextfastcmp_locale, which
- * uses strcoll() to perform comparisons. However, if LC_COLLATE = C, we
- * can make things quite a bit faster with bttextfastcmp_c, which uses
- * memcmp() rather than strcoll().
+ * Most typically, we'll set the comparator to varstrfastcmp_locale, which
+ * uses strcoll() to perform comparisons and knows about the special
+ * requirements of BpChar callers. However, if LC_COLLATE = C, we can make
+ * things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
+ * both of which use memcmp() rather than strcoll().
*
* There is a further exception on Windows. When the database encoding is
* UTF-8 and we are not using the C collation, complex hacks are required.
* We don't currently have a comparator that handles that case, so we fall
- * back on the slow method of having the sort code invoke bttextcmp() via
- * the fmgr trampoline.
+ * back on the slow method of having the sort code invoke bttextcmp() (in
+ * the case of text) via the fmgr trampoline.
*/
if (lc_collate_is_c(collid))
{
- ssup->comparator = bttextfastcmp_c;
+ if (!bpchar)
+ ssup->comparator = varstrfastcmp_c;
+ else
+ ssup->comparator = bpcharfastcmp_c;
+
collate_c = true;
}
#ifdef WIN32
@@ -1784,7 +1805,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
#endif
else
{
- ssup->comparator = bttextfastcmp_locale;
+ ssup->comparator = varstrfastcmp_locale;
/*
* We need a collation-sensitive comparison. To make things faster,
@@ -1825,24 +1846,25 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
/*
* If we're using abbreviated keys, or if we're using a locale-aware
- * comparison, we need to initialize a TextSortSupport object. Both cases
- * will make use of the temporary buffers we initialize here for scratch
- * space, and the abbreviation case requires additional state.
+ * comparison, we need to initialize a StringSortSupport object. Both
+ * cases will make use of the temporary buffers we initialize here for
+ * scratch space (and to detect requirement for BpChar semantics from
+ * caller), and the abbreviation case requires additional state.
*/
if (abbreviate || !collate_c)
{
- tss = palloc(sizeof(TextSortSupport));
- tss->buf1 = palloc(TEXTBUFLEN);
- tss->buflen1 = TEXTBUFLEN;
- tss->buf2 = palloc(TEXTBUFLEN);
- tss->buflen2 = TEXTBUFLEN;
+ sss = palloc(sizeof(StringSortSupport));
+ sss->buf1 = palloc(TEXTBUFLEN);
+ sss->buflen1 = TEXTBUFLEN;
+ sss->buf2 = palloc(TEXTBUFLEN);
+ sss->buflen2 = TEXTBUFLEN;
/* Start with invalid values */
- tss->last_len1 = -1;
- tss->last_len2 = -1;
+ sss->last_len1 = -1;
+ sss->last_len2 = -1;
/* Initialize */
- tss->last_returned = 0;
+ sss->last_returned = 0;
#ifdef HAVE_LOCALE_T
- tss->locale = locale;
+ sss->locale = locale;
#endif
/*
* To avoid somehow confusing a strxfrm() blob and an original string,
@@ -1858,9 +1880,10 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
*
* Arbitrarily initialize cache_blob to true.
*/
- tss->cache_blob = true;
- tss->collate_c = collate_c;
- ssup->ssup_extra = tss;
+ sss->cache_blob = true;
+ sss->collate_c = collate_c;
+ sss->bpchar = bpchar;
+ ssup->ssup_extra = sss;
/*
* If possible, plan to use the abbreviated keys optimization. The
@@ -1869,13 +1892,13 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
*/
if (abbreviate)
{
- tss->prop_card = 0.20;
- initHyperLogLog(&tss->abbr_card, 10);
- initHyperLogLog(&tss->full_card, 10);
+ sss->prop_card = 0.20;
+ initHyperLogLog(&sss->abbr_card, 10);
+ initHyperLogLog(&sss->full_card, 10);
ssup->abbrev_full_comparator = ssup->comparator;
- ssup->comparator = bttextcmp_abbrev;
- ssup->abbrev_converter = bttext_abbrev_convert;
- ssup->abbrev_abort = bttext_abbrev_abort;
+ ssup->comparator = varstrcmp_abbrev;
+ ssup->abbrev_converter = varstr_abbrev_convert;
+ ssup->abbrev_abort = varstr_abbrev_abort;
}
}
}
@@ -1884,10 +1907,10 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
* sortsupport comparison func (for C locale case)
*/
static int
-bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
+varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
{
- text *arg1 = DatumGetTextPP(x);
- text *arg2 = DatumGetTextPP(y);
+ string *arg1 = DatumGetStringPP(x);
+ string *arg2 = DatumGetStringPP(y);
char *a1p,
*a2p;
int len1,
@@ -1913,16 +1936,53 @@ bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
return result;
}
+/*
+ * sortsupport comparison func (for BpChar C locale case)
+ *
+ * BpChar outsources its sortsupport to this module. Specialization for the
+ * varstr_sortsupport BpChar case, modeled on
+ * internal_bpchar_pattern_compare().
+ */
+static int
+bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
+{
+ BpChar *arg1 = DatumGetBpCharPP(x);
+ BpChar *arg2 = DatumGetBpCharPP(y);
+ char *a1p,
+ *a2p;
+ int len1,
+ len2,
+ result;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
+ len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
+
+ result = memcmp(a1p, a2p, Min(len1, len2));
+ if ((result == 0) && (len1 != len2))
+ result = (len1 < len2) ? -1 : 1;
+
+ /* We can't afford to leak memory here. */
+ if (PointerGetDatum(arg1) != x)
+ pfree(arg1);
+ if (PointerGetDatum(arg2) != y)
+ pfree(arg2);
+
+ return result;
+}
+
/*
* sortsupport comparison func (for locale case)
*/
static int
-bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
+varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
{
- text *arg1 = DatumGetTextPP(x);
- text *arg2 = DatumGetTextPP(y);
+ string *arg1 = DatumGetStringPP(x);
+ string *arg2 = DatumGetStringPP(y);
bool arg1_match;
- TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
+ StringSortSupport *sss = (StringSortSupport *) ssup->ssup_extra;
/* working state */
char *a1p,
@@ -1944,41 +2004,56 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
* No change in buf1 or buf2 contents, so avoid changing last_len1 or
* last_len2. Existing contents of buffers might still be used by next
* call.
+ *
+ * It's fine to allow the comparison of BpChar padding bytes here, even
+ * though that implies that the memcmp() will usually be performed for
+ * BpChar callers (though multibyte characters could still prevent that
+ * from occurring). The memcmp() is still very cheap, and BpChar's
+ * funny semantics have us remove trailing spaces (not limited to
+ * padding), so we need make no distinction between padding space
+ * characters and "real" space characters.
*/
result = 0;
goto done;
}
- if (len1 >= tss->buflen1)
+ if (sss->bpchar)
{
- pfree(tss->buf1);
- tss->buflen1 = Max(len1 + 1, Min(tss->buflen1 * 2, MaxAllocSize));
- tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
+ /* Get true number of bytes, ignoring trailing spaces */
+ len1 = bpchartruelen(a1p, len1);
+ len2 = bpchartruelen(a2p, len2);
}
- if (len2 >= tss->buflen2)
+
+ if (len1 >= sss->buflen1)
{
- pfree(tss->buf2);
- tss->buflen2 = Max(len2 + 1, Min(tss->buflen2 * 2, MaxAllocSize));
- tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
+ pfree(sss->buf1);
+ sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
+ sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
+ }
+ if (len2 >= sss->buflen2)
+ {
+ pfree(sss->buf2);
+ sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
+ sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
}
/*
* We're likely to be asked to compare the same strings repeatedly, and
* memcmp() is so much cheaper than strcoll() that it pays to try to cache
* comparisons, even though in general there is no reason to think that
- * that will work out (every text datum may be unique). Caching does not
+ * that will work out (every string datum may be unique). Caching does not
* slow things down measurably when it doesn't work out, and can speed
* things up by rather a lot when it does. In part, this is because the
* memcmp() compares data from cachelines that are needed in L1 cache even
* when the last comparison's result cannot be reused.
*/
arg1_match = true;
- if (len1 != tss->last_len1 || memcmp(tss->buf1, a1p, len1) != 0)
+ if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
{
arg1_match = false;
- memcpy(tss->buf1, a1p, len1);
- tss->buf1[len1] = '\0';
- tss->last_len1 = len1;
+ memcpy(sss->buf1, a1p, len1);
+ sss->buf1[len1] = '\0';
+ sss->last_len1 = len1;
}
/*
@@ -1987,25 +2062,25 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
* it seems (at least with moderate to low cardinality sets), because
* quicksort compares the same pivot against many values.
*/
- if (len2 != tss->last_len2 || memcmp(tss->buf2, a2p, len2) != 0)
+ if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
{
- memcpy(tss->buf2, a2p, len2);
- tss->buf2[len2] = '\0';
- tss->last_len2 = len2;
+ memcpy(sss->buf2, a2p, len2);
+ sss->buf2[len2] = '\0';
+ sss->last_len2 = len2;
}
- else if (arg1_match && !tss->cache_blob)
+ else if (arg1_match && !sss->cache_blob)
{
/* Use result cached following last actual strcoll() call */
- result = tss->last_returned;
+ result = sss->last_returned;
goto done;
}
#ifdef HAVE_LOCALE_T
- if (tss->locale)
- result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
+ if (sss->locale)
+ result = strcoll_l(sss->buf1, sss->buf2, sss->locale);
else
#endif
- result = strcoll(tss->buf1, tss->buf2);
+ result = strcoll(sss->buf1, sss->buf2);
/*
* In some locales strcoll() can claim that nonidentical strings are
@@ -2013,11 +2088,11 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
* follow Perl's lead and sort "equal" strings according to strcmp().
*/
if (result == 0)
- result = strcmp(tss->buf1, tss->buf2);
+ result = strcmp(sss->buf1, sss->buf2);
/* Cache result, perhaps saving an expensive strcoll() call next time */
- tss->cache_blob = false;
- tss->last_returned = result;
+ sss->cache_blob = false;
+ sss->last_returned = result;
done:
/* We can't afford to leak memory here. */
if (PointerGetDatum(arg1) != x)
@@ -2032,13 +2107,14 @@ done:
* Abbreviated key comparison func
*/
static int
-bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
+varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
{
/*
- * When 0 is returned, the core system will call bttextfastcmp_c() or
- * bttextfastcmp_locale(). Even a strcmp() on two non-truncated strxfrm()
- * blobs cannot indicate *equality* authoritatively, for the same reason
- * that there is a strcoll() tie-breaker call to strcmp() in varstr_cmp().
+ * When 0 is returned, the core system will call varstrfastcmp_c()
+ * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale(). Even a
+ * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
+ * authoritatively, for the same reason that there is a strcoll()
+ * tie-breaker call to strcmp() in varstr_cmp().
*/
if (x > y)
return 1;
@@ -2049,16 +2125,17 @@ bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
}
/*
- * Conversion routine for sortsupport. Converts original text to abbreviated
- * key representation. Our encoding strategy is simple -- pack the first 8
- * bytes of a strxfrm() blob into a Datum (on little-endian machines, the 8
- * bytes are stored in reverse order), and treat it as an unsigned integer.
+ * Conversion routine for sortsupport. Converts original to abbreviated key
+ * representation. Our encoding strategy is simple -- pack the first 8 bytes
+ * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
+ * stored in reverse order), and treat it as an unsigned integer. When the "C"
+ * locale is used, or in case of bytea, just memcpy() from original instead.
*/
static Datum
-bttext_abbrev_convert(Datum original, SortSupport ssup)
+varstr_abbrev_convert(Datum original, SortSupport ssup)
{
- TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
- text *authoritative = DatumGetTextPP(original);
+ StringSortSupport *sss = (StringSortSupport *) ssup->ssup_extra;
+ string *authoritative = DatumGetStringPP(original);
char *authoritative_data = VARDATA_ANY(authoritative);
/* working state */
@@ -2072,13 +2149,38 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
memset(pres, 0, sizeof(Datum));
len = VARSIZE_ANY_EXHDR(authoritative);
+ /* Get number of bytes, ignoring trailing spaces */
+ if (sss->bpchar)
+ len = bpchartruelen(authoritative_data, len);
+
/*
* If we're using the C collation, use memcmp(), rather than strxfrm(), to
* abbreviate keys. The full comparator for the C locale is always
- * memcmp(), and we can't risk having this give a different answer.
- * Besides, this should be faster, too.
+ * memcmp(). It would be incorrect to allow bytea callers (callers that
+ * always force the C collation -- bytea isn't a collatable type, but this
+ * approach is convenient) to use strxfrm(). This is because bytea strings
+ * may contain NUL bytes. Besides, this should be faster, too.
+ *
+ * More generally, it's okay that bytea callers can have NUL bytes in
+ * strings because varstrcmp_abbrev() need not make a distinction between
+ * terminating NUL bytes, and NUL bytes representing actual NULs in the
+ * authoritative representation. Hopefully a comparison at or past one
+ * abbreviated key's terminating NUL byte will resolve the comparison
+ * without consulting the authoritative representation; specifically, some
+ * later non-NUL byte in the longer string can resolve the comparison
+ * against a subsequent terminating NUL in the shorter string. There will
+ * usually be what is effectively a "length-wise" resolution there and
+ * then.
+ *
+ * If that doesn't work out -- if all bytes in the longer string positioned
+ * at or past the offset of the smaller string's (first) terminating NUL
+ * are actually representative of NUL bytes in the authoritative binary
+ * string (perhaps with some *terminating* NUL bytes towards the end of the
+ * longer string iff it happens to still be small) -- then an authoritative
+ * tie-breaker will happen, and do the right thing: explicitly consider
+ * string length.
*/
- if (tss->collate_c)
+ if (sss->collate_c)
memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
else
{
@@ -2088,50 +2190,50 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
* We're not using the C collation, so fall back on strxfrm.
*/
- /* By convention, we use buffer 1 to store and NUL-terminate text */
- if (len >= tss->buflen1)
+ /* By convention, we use buffer 1 to store and NUL-terminate */
+ if (len >= sss->buflen1)
{
- pfree(tss->buf1);
- tss->buflen1 = Max(len + 1, Min(tss->buflen1 * 2, MaxAllocSize));
- tss->buf1 = palloc(tss->buflen1);
+ pfree(sss->buf1);
+ sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
+ sss->buf1 = palloc(sss->buflen1);
}
/* Might be able to reuse strxfrm() blob from last call */
- if (tss->last_len1 == len && tss->cache_blob &&
- memcmp(tss->buf1, authoritative_data, len) == 0)
+ if (sss->last_len1 == len && sss->cache_blob &&
+ memcmp(sss->buf1, authoritative_data, len) == 0)
{
- memcpy(pres, tss->buf2, Min(sizeof(Datum), tss->last_len2));
+ memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
/* No change affecting cardinality, so no hashing required */
goto done;
}
/* Just like strcoll(), strxfrm() expects a NUL-terminated string */
- memcpy(tss->buf1, authoritative_data, len);
- tss->buf1[len] = '\0';
- tss->last_len1 = len;
+ memcpy(sss->buf1, authoritative_data, len);
+ sss->buf1[len] = '\0';
+ sss->last_len1 = len;
for (;;)
{
#ifdef HAVE_LOCALE_T
- if (tss->locale)
- bsize = strxfrm_l(tss->buf2, tss->buf1,
- tss->buflen2, tss->locale);
+ if (sss->locale)
+ bsize = strxfrm_l(sss->buf2, sss->buf1,
+ sss->buflen2, sss->locale);
else
#endif
- bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2);
+ bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
- tss->last_len2 = bsize;
- if (bsize < tss->buflen2)
+ sss->last_len2 = bsize;
+ if (bsize < sss->buflen2)
break;
/*
* The C standard states that the contents of the buffer is now
* unspecified. Grow buffer, and retry.
*/
- pfree(tss->buf2);
- tss->buflen2 = Max(bsize + 1,
- Min(tss->buflen2 * 2, MaxAllocSize));
- tss->buf2 = palloc(tss->buflen2);
+ pfree(sss->buf2);
+ sss->buflen2 = Max(bsize + 1,
+ Min(sss->buflen2 * 2, MaxAllocSize));
+ sss->buf2 = palloc(sss->buflen2);
}
/*
@@ -2139,8 +2241,11 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
* strxfrm() blob is itself NUL terminated, leaving no danger of
* misinterpreting any NUL bytes not intended to be interpreted as
* logically representing termination.
+ *
+ * (Actually, even if there were NUL bytes in the blob it would be
+ * okay. See remarks on bytea case above.)
*/
- memcpy(pres, tss->buf2, Min(sizeof(Datum), bsize));
+ memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
}
/*
@@ -2148,7 +2253,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
* authoritative keys using HyperLogLog. Used as cheap insurance against
* the worst case, where we do many string transformations for no saving
* in full strcoll()-based comparisons. These statistics are used by
- * bttext_abbrev_abort().
+ * varstr_abbrev_abort().
*
* First, Hash key proper, or a significant fraction of it. Mix in length
* in order to compensate for cases where differences are past
@@ -2160,7 +2265,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
if (len > PG_CACHE_LINE_SIZE)
hash ^= DatumGetUInt32(hash_uint32((uint32) len));
- addHyperLogLog(&tss->full_card, hash);
+ addHyperLogLog(&sss->full_card, hash);
/* Hash abbreviated key */
#if SIZEOF_DATUM == 8
@@ -2176,15 +2281,15 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
hash = DatumGetUInt32(hash_uint32((uint32) res));
#endif
- addHyperLogLog(&tss->abbr_card, hash);
+ addHyperLogLog(&sss->abbr_card, hash);
/* Cache result, perhaps saving an expensive strxfrm() call next time */
- tss->cache_blob = true;
+ sss->cache_blob = true;
done:
/*
* Byteswap on little-endian machines.
*
- * This is needed so that bttextcmp_abbrev() (an unsigned integer 3-way
+ * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
* comparator) works correctly on all platforms. If we didn't do this,
* the comparator would have to call memcmp() with a pair of pointers to
* the first byte of each abbreviated key, which is slower.
@@ -2204,9 +2309,9 @@ done:
* should be aborted, based on its projected effectiveness.
*/
static bool
-bttext_abbrev_abort(int memtupcount, SortSupport ssup)
+varstr_abbrev_abort(int memtupcount, SortSupport ssup)
{
- TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
+ StringSortSupport *sss = (StringSortSupport *) ssup->ssup_extra;
double abbrev_distinct,
key_distinct;
@@ -2216,8 +2321,8 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
if (memtupcount < 100)
return false;
- abbrev_distinct = estimateHyperLogLog(&tss->abbr_card);
- key_distinct = estimateHyperLogLog(&tss->full_card);
+ abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
+ key_distinct = estimateHyperLogLog(&sss->full_card);
/*
* Clamp cardinality estimates to at least one distinct value. While
@@ -2240,10 +2345,10 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
{
double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
- elog(LOG, "bttext_abbrev: abbrev_distinct after %d: %f "
+ elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
"(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
- tss->prop_card);
+ sss->prop_card);
}
#endif
@@ -2263,7 +2368,7 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
* abbreviated comparison with a cheap memcmp()-based authoritative
* resolution are equivalent.
*/
- if (abbrev_distinct > key_distinct * tss->prop_card)
+ if (abbrev_distinct > key_distinct * sss->prop_card)
{
/*
* When we have exceeded 10,000 tuples, decay required cardinality
@@ -2291,7 +2396,7 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
* apparent it's probably not worth aborting.
*/
if (memtupcount > 10000)
- tss->prop_card *= 0.65;
+ sss->prop_card *= 0.65;
return false;
}
@@ -2309,9 +2414,9 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
*/
#ifdef TRACE_SORT
if (trace_sort)
- elog(LOG, "bttext_abbrev: aborted abbreviation at %d "
+ elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
"(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
- memtupcount, abbrev_distinct, key_distinct, tss->prop_card);
+ memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
#endif
return true;
@@ -2345,8 +2450,9 @@ text_smaller(PG_FUNCTION_ARGS)
/*
* The following operators support character-by-character comparison
* of text datums, to allow building indexes suitable for LIKE clauses.
- * Note that the regular texteq/textne comparison operators are assumed
- * to be compatible with these!
+ * Note that the regular texteq/textne comparison operators, and regular
+ * support functions 1 and 2 with "C" collation are assumed to be
+ * compatible with these!
*/
static int
@@ -2451,6 +2557,23 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS)
}
+Datum
+bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, C_COLLATION_OID, false);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+
/*-------------------------------------------------------------
* byteaoctetlen
*
@@ -3375,6 +3498,22 @@ byteacmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(cmp);
}
+Datum
+bytea_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, C_COLLATION_OID, false);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
/*
* appendStringInfoText
*
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 5c480b7d3ab..568c98f94e4 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201601281
+#define CATALOG_VERSION_NO 201602031
#endif
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index e75da76b993..f0ae0087048 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -80,7 +80,9 @@ DATA(insert ( 421 702 702 1 357 ));
DATA(insert ( 423 1560 1560 1 1596 ));
DATA(insert ( 424 16 16 1 1693 ));
DATA(insert ( 426 1042 1042 1 1078 ));
+DATA(insert ( 426 1042 1042 2 3328 ));
DATA(insert ( 428 17 17 1 1954 ));
+DATA(insert ( 428 17 17 2 3331 ));
DATA(insert ( 429 18 18 1 358 ));
DATA(insert ( 434 1082 1082 1 1092 ));
DATA(insert ( 434 1082 1082 2 3136 ));
@@ -128,7 +130,9 @@ DATA(insert ( 1996 1083 1083 1 1107 ));
DATA(insert ( 2000 1266 1266 1 1358 ));
DATA(insert ( 2002 1562 1562 1 1672 ));
DATA(insert ( 2095 25 25 1 2166 ));
+DATA(insert ( 2095 25 25 2 3332 ));
DATA(insert ( 2097 1042 1042 1 2180 ));
+DATA(insert ( 2097 1042 1042 2 3333 ));
DATA(insert ( 2099 790 790 1 377 ));
DATA(insert ( 2233 703 703 1 380 ));
DATA(insert ( 2234 704 704 1 381 ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a2248b4679f..5ded13e2b01 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -1130,6 +1130,8 @@ DATA(insert OID = 1064 ( bpchar_smaller PGNSP PGUID 12 1 0 0 0 f f f f t f i
DESCR("smaller of two");
DATA(insert OID = 1078 ( bpcharcmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "1042 1042" _null_ _null_ _null_ _null_ _null_ bpcharcmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
+DATA(insert OID = 3328 ( bpchar_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ bpchar_sortsupport _null_ _null_ _null_ ));
+DESCR("sort support");
DATA(insert OID = 1080 ( hashbpchar PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 23 "1042" _null_ _null_ _null_ _null_ _null_ hashbpchar _null_ _null_ _null_ ));
DESCR("hash");
DATA(insert OID = 1081 ( format_type PGNSP PGUID 12 1 0 0 0 f f f f f f s s 2 0 25 "26 23" _null_ _null_ _null_ _null_ _null_ format_type _null_ _null_ _null_ ));
@@ -2861,6 +2863,8 @@ DATA(insert OID = 1952 ( byteage PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0
DATA(insert OID = 1953 ( byteane PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "17 17" _null_ _null_ _null_ _null_ _null_ byteane _null_ _null_ _null_ ));
DATA(insert OID = 1954 ( byteacmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "17 17" _null_ _null_ _null_ _null_ _null_ byteacmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
+DATA(insert OID = 3331 ( bytea_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ bytea_sortsupport _null_ _null_ _null_ ));
+DESCR("sort support");
DATA(insert OID = 3917 ( timestamp_transform PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2281 "2281" _null_ _null_ _null_ _null_ _null_ timestamp_transform _null_ _null_ _null_ ));
DESCR("transform a timestamp length coercion");
@@ -3373,6 +3377,8 @@ DATA(insert OID = 2163 ( text_pattern_ge PGNSP PGUID 12 1 0 0 0 f f f f t f i s
DATA(insert OID = 2164 ( text_pattern_gt PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_pattern_gt _null_ _null_ _null_ ));
DATA(insert OID = 2166 ( bttext_pattern_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "25 25" _null_ _null_ _null_ _null_ _null_ bttext_pattern_cmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
+DATA(insert OID = 3332 ( bttext_pattern_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ bttext_pattern_sortsupport _null_ _null_ _null_ ));
+DESCR("sort support");
DATA(insert OID = 2174 ( bpchar_pattern_lt PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "1042 1042" _null_ _null_ _null_ _null_ _null_ bpchar_pattern_lt _null_ _null_ _null_ ));
DATA(insert OID = 2175 ( bpchar_pattern_le PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "1042 1042" _null_ _null_ _null_ _null_ _null_ bpchar_pattern_le _null_ _null_ _null_ ));
@@ -3380,6 +3386,8 @@ DATA(insert OID = 2177 ( bpchar_pattern_ge PGNSP PGUID 12 1 0 0 0 f f f f t f
DATA(insert OID = 2178 ( bpchar_pattern_gt PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "1042 1042" _null_ _null_ _null_ _null_ _null_ bpchar_pattern_gt _null_ _null_ _null_ ));
DATA(insert OID = 2180 ( btbpchar_pattern_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "1042 1042" _null_ _null_ _null_ _null_ _null_ btbpchar_pattern_cmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
+DATA(insert OID = 3333 ( btbpchar_pattern_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ btbpchar_pattern_sortsupport _null_ _null_ _null_ ));
+DESCR("sort support");
DATA(insert OID = 2188 ( btint48cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "23 20" _null_ _null_ _null_ _null_ _null_ btint48cmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index c2e529fc6f6..5e8e8329b82 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -16,6 +16,7 @@
#include "fmgr.h"
#include "nodes/parsenodes.h"
+#include "utils/sortsupport.h"
/*
* Defined in adt/
@@ -761,8 +762,10 @@ extern Datum bpcharle(PG_FUNCTION_ARGS);
extern Datum bpchargt(PG_FUNCTION_ARGS);
extern Datum bpcharge(PG_FUNCTION_ARGS);
extern Datum bpcharcmp(PG_FUNCTION_ARGS);
+extern Datum bpchar_sortsupport(PG_FUNCTION_ARGS);
extern Datum bpchar_larger(PG_FUNCTION_ARGS);
extern Datum bpchar_smaller(PG_FUNCTION_ARGS);
+extern int bpchartruelen(char *s, int len);
extern Datum bpcharlen(PG_FUNCTION_ARGS);
extern Datum bpcharoctetlen(PG_FUNCTION_ARGS);
extern Datum hashbpchar(PG_FUNCTION_ARGS);
@@ -771,6 +774,7 @@ extern Datum bpchar_pattern_le(PG_FUNCTION_ARGS);
extern Datum bpchar_pattern_gt(PG_FUNCTION_ARGS);
extern Datum bpchar_pattern_ge(PG_FUNCTION_ARGS);
extern Datum btbpchar_pattern_cmp(PG_FUNCTION_ARGS);
+extern Datum btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS);
extern Datum varcharin(PG_FUNCTION_ARGS);
extern Datum varcharout(PG_FUNCTION_ARGS);
@@ -808,6 +812,7 @@ extern Datum text_pattern_le(PG_FUNCTION_ARGS);
extern Datum text_pattern_gt(PG_FUNCTION_ARGS);
extern Datum text_pattern_ge(PG_FUNCTION_ARGS);
extern Datum bttext_pattern_cmp(PG_FUNCTION_ARGS);
+extern Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS);
extern Datum textlen(PG_FUNCTION_ARGS);
extern Datum textoctetlen(PG_FUNCTION_ARGS);
extern Datum textpos(PG_FUNCTION_ARGS);
@@ -818,6 +823,7 @@ extern Datum textoverlay_no_len(PG_FUNCTION_ARGS);
extern Datum name_text(PG_FUNCTION_ARGS);
extern Datum text_name(PG_FUNCTION_ARGS);
extern int varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid);
+extern void varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar);
extern int varstr_levenshtein(const char *source, int slen,
const char *target, int tlen,
int ins_c, int del_c, int sub_c,
diff --git a/src/include/utils/bytea.h b/src/include/utils/bytea.h
index 40147fbaa8b..c41e6b4b7a5 100644
--- a/src/include/utils/bytea.h
+++ b/src/include/utils/bytea.h
@@ -42,6 +42,7 @@ extern Datum byteale(PG_FUNCTION_ARGS);
extern Datum byteagt(PG_FUNCTION_ARGS);
extern Datum byteage(PG_FUNCTION_ARGS);
extern Datum byteacmp(PG_FUNCTION_ARGS);
+extern Datum bytea_sortsupport(PG_FUNCTION_ARGS);
extern Datum byteacat(PG_FUNCTION_ARGS);
extern Datum byteapos(PG_FUNCTION_ARGS);
extern Datum bytea_substr(PG_FUNCTION_ARGS);