1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Introduce a SQL-callable function array_sort(anyarray).

Create a function that will sort the elements of an array
according to the element type's sort order.  If the array
has more than one dimension, the sub-arrays of the first
dimension are sorted per normal array-comparison rules,
leaving their contents alone.

In support of this, add pg_type.typarray to the set of fields
cached by the typcache.

Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>
Reviewed-by: Aleksander Alekseev <aleksander@timescale.com>
Discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw@mail.gmail.com
This commit is contained in:
Tom Lane
2025-04-01 18:03:55 -04:00
parent 6da2ba1d8a
commit 6c12ae09f5
11 changed files with 426 additions and 1 deletions

View File

@ -20741,6 +20741,42 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>array_sort</primary>
</indexterm>
<function>array_sort</function> (
<parameter>array</parameter> <type>anyarray</type>
<optional>, <parameter>descending</parameter> <type>boolean</type>
<optional>, <parameter>nulls_first</parameter> <type>boolean</type>
</optional></optional> )
<returnvalue>anyarray</returnvalue>
</para>
<para>
Sorts the first dimension of the array.
The sort order is determined by the default sort ordering of the
array's element type; however, if the element type is collatable,
the collation to use can be specified by adding
a <literal>COLLATE</literal> clause to
the <parameter>array</parameter> argument.
</para>
<para>
If <parameter>descending</parameter> is true then sort in
descending order, otherwise ascending order. If omitted, the
default is ascending order.
If <parameter>nulls_first</parameter> is true then nulls appear
before non-null values, otherwise nulls appear after non-null
values.
If omitted, <parameter>nulls_first</parameter> is taken to have
the same value as <parameter>descending</parameter>.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
<returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">

View File

@ -12,16 +12,19 @@
*/
#include "postgres.h"
#include "catalog/pg_operator_d.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "nodes/supportnodes.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@ -43,6 +46,18 @@ typedef struct DeserialIOData
Oid typioparam;
} DeserialIOData;
/*
* ArraySortCachedInfo
* Used for caching catalog data in array_sort
*/
typedef struct ArraySortCachedInfo
{
ArrayMetaState array_meta; /* metadata for array_create_iterator */
Oid elem_lt_opr; /* "<" operator for element type */
Oid elem_gt_opr; /* ">" operator for element type */
Oid array_type; /* pg_type OID of array type */
} ArraySortCachedInfo;
static Datum array_position_common(FunctionCallInfo fcinfo);
@ -1858,3 +1873,166 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
/*
* array_sort
*
* Sorts the first dimension of the array.
*/
static ArrayType *
array_sort_internal(ArrayType *array, bool descending, bool nulls_first,
FunctionCallInfo fcinfo)
{
ArrayType *newarray;
Oid collation = PG_GET_COLLATION();
int ndim,
*dims,
*lbs;
ArraySortCachedInfo *cache_info;
Oid elmtyp;
Oid sort_typ;
Oid sort_opr;
Tuplesortstate *tuplesortstate;
ArrayIterator array_iterator;
Datum value;
bool isnull;
ArrayBuildStateAny *astate = NULL;
ndim = ARR_NDIM(array);
dims = ARR_DIMS(array);
lbs = ARR_LBOUND(array);
/* Quick exit if we don't need to sort */
if (ndim < 1 || dims[0] < 2)
return array;
/* Set up cache area if we didn't already */
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
{
cache_info = (ArraySortCachedInfo *)
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
sizeof(ArraySortCachedInfo));
fcinfo->flinfo->fn_extra = cache_info;
}
/* Fetch and cache required data if we don't have it */
elmtyp = ARR_ELEMTYPE(array);
if (elmtyp != cache_info->array_meta.element_type)
{
TypeCacheEntry *typentry;
typentry = lookup_type_cache(elmtyp,
TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
cache_info->array_meta.element_type = elmtyp;
cache_info->array_meta.typlen = typentry->typlen;
cache_info->array_meta.typbyval = typentry->typbyval;
cache_info->array_meta.typalign = typentry->typalign;
cache_info->elem_lt_opr = typentry->lt_opr;
cache_info->elem_gt_opr = typentry->gt_opr;
cache_info->array_type = typentry->typarray;
}
/* Identify the sort operator to use */
if (ndim == 1)
{
/* Need to sort the element type */
sort_typ = elmtyp;
sort_opr = (descending ? cache_info->elem_gt_opr : cache_info->elem_lt_opr);
}
else
{
/* Otherwise we're sorting arrays */
sort_typ = cache_info->array_type;
if (!OidIsValid(sort_typ))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(elmtyp))));
/* We know what operators to use for arrays */
sort_opr = (descending ? ARRAY_GT_OP : ARRAY_LT_OP);
}
/*
* Fail if we don't know how to sort. The error message is chosen to
* match what array_lt()/array_gt() will say in the multidimensional case.
*/
if (!OidIsValid(sort_opr))
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not identify a comparison function for type %s",
format_type_be(elmtyp)));
/* Put the things to be sorted (elements or sub-arrays) into a tuplesort */
tuplesortstate = tuplesort_begin_datum(sort_typ,
sort_opr,
collation,
nulls_first,
work_mem,
NULL,
TUPLESORT_NONE);
array_iterator = array_create_iterator(array, ndim - 1,
&cache_info->array_meta);
while (array_iterate(array_iterator, &value, &isnull))
{
tuplesort_putdatum(tuplesortstate, value, isnull);
}
array_free_iterator(array_iterator);
/* Do the sort */
tuplesort_performsort(tuplesortstate);
/* Extract results into a new array */
while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
{
astate = accumArrayResultAny(astate, value, isnull,
sort_typ, CurrentMemoryContext);
}
tuplesort_end(tuplesortstate);
newarray = DatumGetArrayTypeP(makeArrayResultAny(astate,
CurrentMemoryContext,
true));
/* Adjust lower bound to match the input */
ARR_LBOUND(newarray)[0] = lbs[0];
return newarray;
}
Datum
array_sort(PG_FUNCTION_ARGS)
{
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
false,
false,
fcinfo));
}
Datum
array_sort_order(PG_FUNCTION_ARGS)
{
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
bool descending = PG_GETARG_BOOL(1);
PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
descending,
descending,
fcinfo));
}
Datum
array_sort_order_nulls_first(PG_FUNCTION_ARGS)
{
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
bool descending = PG_GETARG_BOOL(1);
bool nulls_first = PG_GETARG_BOOL(2);
PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
descending,
nulls_first,
fcinfo));
}

View File

@ -499,6 +499,7 @@ lookup_type_cache(Oid type_id, int flags)
typentry->typrelid = typtup->typrelid;
typentry->typsubscript = typtup->typsubscript;
typentry->typelem = typtup->typelem;
typentry->typarray = typtup->typarray;
typentry->typcollation = typtup->typcollation;
typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA;
@ -544,6 +545,7 @@ lookup_type_cache(Oid type_id, int flags)
typentry->typrelid = typtup->typrelid;
typentry->typsubscript = typtup->typsubscript;
typentry->typelem = typtup->typelem;
typentry->typarray = typtup->typarray;
typentry->typcollation = typtup->typcollation;
typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA;

View File

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202504011
#define CATALOG_VERSION_NO 202504012
#endif

View File

@ -1772,6 +1772,18 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray',
prosrc => 'array_sort' },
{ oid => '8811', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray bool', proargnames => '{array,descending}',
prosrc => 'array_sort_order' },
{ oid => '8812', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray bool bool',
proargnames => '{array,descending,nulls_first}',
prosrc => 'array_sort_order_nulls_first' },
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },

View File

@ -44,6 +44,7 @@ typedef struct TypeCacheEntry
Oid typrelid;
Oid typsubscript;
Oid typelem;
Oid typarray;
Oid typcollation;
/*

View File

@ -2860,3 +2860,145 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
-- array_sort
SELECT array_sort('{}'::int[]);
array_sort
------------
{}
(1 row)
SELECT array_sort('{1}'::int[]);
array_sort
------------
{1}
(1 row)
SELECT array_sort('{1,3,5,2,4,6}'::int[]);
array_sort
---------------
{1,2,3,4,5,6}
(1 row)
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
array_sort
---------------------------
{1.1,2.2,3.3,4.4,5.5,6.6}
(1 row)
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
array_sort
-----------------------
{Abc,CCC,bar,bbc,foo}
(1 row)
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
array_sort
----------------------------
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
array_sort
---------------
{"1 2","1 4"}
(1 row)
-- array_sort with order specified
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
array_sort
--------------------------------
{NULL,6.6,5.5,4.4,3.3,2.2,1.1}
(1 row)
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
array_sort
--------------------------------
{1.1,2.2,3.3,4.4,5.5,6.6,NULL}
(1 row)
-- array_sort with order and nullsfirst flag specified
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
array_sort
--------------------------------
{NULL,6.6,5.5,4.4,3.3,2.2,1.1}
(1 row)
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
array_sort
--------------------------------
{6.6,5.5,4.4,3.3,2.2,1.1,NULL}
(1 row)
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
array_sort
--------------------------------
{NULL,1.1,2.2,3.3,4.4,5.5,6.6}
(1 row)
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
array_sort
--------------------------------
{1.1,2.2,3.3,4.4,5.5,6.6,NULL}
(1 row)
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
array_sort
------------
{{1}}
(1 row)
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
array_sort
---------------------
{{2,1},{2,4},{6,5}}
(1 row)
SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
array_sort
---------------------------------
{{"1 -2","-1 4"},{"1 2","3 4"}}
(1 row)
-- no ordering operator tests
SELECT array_sort('{1}'::xid[]); -- no error because no sort is required
array_sort
------------
{1}
(1 row)
SELECT array_sort('{1,2,3}'::xid[]);
ERROR: could not identify a comparison function for type xid
SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
ERROR: could not identify a comparison function for type xid
-- bounds preservation tests
SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
array_sort
--------------------------------------
[10:12][20:21]={{1,2},{3,4},{10,20}}
(1 row)
SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
array_sort
--------------
[-1:0]={1,7}
(1 row)
SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
array_sort
--------------------------------------
[-2:0][20:21]={{1,-4},{1,2},{10,20}}
(1 row)
SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
array_sort
------------------
{{1,-4},{10,20}}
(1 row)
SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
array_sort
------------
{{1},{10}}
(1 row)

View File

@ -1471,6 +1471,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
-- tests with array_sort
SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
array_sort
------------
{a,B}
(1 row)
SELECT array_sort('{a,B}'::text[] COLLATE "C");
array_sort
------------
{B,a}
(1 row)
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;

View File

@ -856,3 +856,39 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
-- array_sort
SELECT array_sort('{}'::int[]);
SELECT array_sort('{1}'::int[]);
SELECT array_sort('{1,3,5,2,4,6}'::int[]);
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
-- array_sort with order specified
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
-- array_sort with order and nullsfirst flag specified
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
-- no ordering operator tests
SELECT array_sort('{1}'::xid[]); -- no error because no sort is required
SELECT array_sort('{1,2,3}'::xid[]);
SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
-- bounds preservation tests
SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);

View File

@ -564,6 +564,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
-- tests with array_sort
SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
SELECT array_sort('{a,B}'::text[] COLLATE "C");
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;

View File

@ -154,6 +154,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType