mirror of
https://github.com/postgres/postgres.git
synced 2025-05-20 05:13:53 +03:00
Alexander Lakhin Discussion: https://postgr.es/m/ce7debdd-c943-d7a7-9b41-687107b27831@gmail.com
506 lines
16 KiB
Plaintext
506 lines
16 KiB
Plaintext
<!-- doc/src/sgml/intarray.sgml -->
|
|
|
|
<sect1 id="intarray" xreflabel="intarray">
|
|
<title>intarray</title>
|
|
|
|
<indexterm zone="intarray">
|
|
<primary>intarray</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <filename>intarray</filename> module provides a number of useful functions
|
|
and operators for manipulating null-free arrays of integers.
|
|
There is also support for indexed searches using some of the operators.
|
|
</para>
|
|
|
|
<para>
|
|
All of these operations will throw an error if a supplied array contains any
|
|
NULL elements.
|
|
</para>
|
|
|
|
<para>
|
|
Many of these operations are only sensible for one-dimensional arrays.
|
|
Although they will accept input arrays of more dimensions, the data is
|
|
treated as though it were a linear array in storage order.
|
|
</para>
|
|
|
|
<para>
|
|
This module is considered <quote>trusted</quote>, that is, it can be
|
|
installed by non-superusers who have <literal>CREATE</literal> privilege
|
|
on the current database.
|
|
</para>
|
|
|
|
<sect2>
|
|
<title><filename>intarray</filename> Functions and Operators</title>
|
|
|
|
<para>
|
|
The functions provided by the <filename>intarray</filename> module
|
|
are shown in <xref linkend="intarray-func-table"/>, the operators
|
|
in <xref linkend="intarray-op-table"/>.
|
|
</para>
|
|
|
|
<table id="intarray-func-table">
|
|
<title><filename>intarray</filename> Functions</title>
|
|
<tgroup cols="1">
|
|
<thead>
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
Function
|
|
</para>
|
|
<para>
|
|
Description
|
|
</para>
|
|
<para>
|
|
Example(s)
|
|
</para></entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<indexterm><primary>icount</primary></indexterm>
|
|
<function>icount</function> ( <type>integer[]</type> )
|
|
<returnvalue>integer</returnvalue>
|
|
</para>
|
|
<para>
|
|
Returns the number of elements in the array.
|
|
</para>
|
|
<para>
|
|
<literal>icount('{1,2,3}'::integer[])</literal>
|
|
<returnvalue>3</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<indexterm><primary>sort</primary></indexterm>
|
|
<function>sort</function> ( <type>integer[]</type>, <parameter>dir</parameter> <type>text</type> )
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Sorts the array in either ascending or descending order.
|
|
<parameter>dir</parameter> must be <literal>asc</literal>
|
|
or <literal>desc</literal>.
|
|
</para>
|
|
<para>
|
|
<literal>sort('{1,3,2}'::integer[], 'desc')</literal>
|
|
<returnvalue>{3,2,1}</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>sort</function> ( <type>integer[]</type> )
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para role="func_signature">
|
|
<indexterm><primary>sort_asc</primary></indexterm>
|
|
<function>sort_asc</function> ( <type>integer[]</type> )
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Sorts in ascending order.
|
|
</para>
|
|
<para>
|
|
<literal>sort(array[11,77,44])</literal>
|
|
<returnvalue>{11,44,77}</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<indexterm><primary>sort_desc</primary></indexterm>
|
|
<function>sort_desc</function> ( <type>integer[]</type> )
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Sorts in descending order.
|
|
</para>
|
|
<para>
|
|
<literal>sort_desc(array[11,77,44])</literal>
|
|
<returnvalue>{77,44,11}</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<indexterm><primary>uniq</primary></indexterm>
|
|
<function>uniq</function> ( <type>integer[]</type> )
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Removes adjacent duplicates.
|
|
</para>
|
|
<para>
|
|
<literal>uniq(sort('{1,2,3,2,1}'::integer[]))</literal>
|
|
<returnvalue>{1,2,3}</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<indexterm><primary>idx</primary></indexterm>
|
|
<function>idx</function> ( <type>integer[]</type>, <parameter>item</parameter> <type>integer</type> )
|
|
<returnvalue>integer</returnvalue>
|
|
</para>
|
|
<para>
|
|
Returns index of the first array element
|
|
matching <parameter>item</parameter>, or 0 if no match.
|
|
</para>
|
|
<para>
|
|
<literal>idx(array[11,22,33,22,11], 22)</literal>
|
|
<returnvalue>2</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<indexterm><primary>subarray</primary></indexterm>
|
|
<function>subarray</function> ( <type>integer[]</type>, <parameter>start</parameter> <type>integer</type>, <parameter>len</parameter> <type>integer</type> )
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Extracts the portion of the array starting at
|
|
position <parameter>start</parameter>, with <parameter>len</parameter>
|
|
elements.
|
|
</para>
|
|
<para>
|
|
<literal>subarray('{1,2,3,2,1}'::integer[], 2, 3)</literal>
|
|
<returnvalue>{2,3,2}</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>subarray</function> ( <type>integer[]</type>, <parameter>start</parameter> <type>integer</type> )
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Extracts the portion of the array starting at
|
|
position <parameter>start</parameter>.
|
|
</para>
|
|
<para>
|
|
<literal>subarray('{1,2,3,2,1}'::integer[], 2)</literal>
|
|
<returnvalue>{2,3,2,1}</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<indexterm><primary>intset</primary></indexterm>
|
|
<function>intset</function> ( <type>integer</type> )
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Makes a single-element array.
|
|
</para>
|
|
<para>
|
|
<literal>intset(42)</literal>
|
|
<returnvalue>{42}</returnvalue>
|
|
</para></entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<table id="intarray-op-table">
|
|
<title><filename>intarray</filename> Operators</title>
|
|
<tgroup cols="1">
|
|
<thead>
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
Operator
|
|
</para>
|
|
<para>
|
|
Description
|
|
</para></entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>&&</literal> <type>integer[]</type>
|
|
<returnvalue>boolean</returnvalue>
|
|
</para>
|
|
<para>
|
|
Do arrays overlap (have at least one element in common)?
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>@></literal> <type>integer[]</type>
|
|
<returnvalue>boolean</returnvalue>
|
|
</para>
|
|
<para>
|
|
Does left array contain right array?
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal><@</literal> <type>integer[]</type>
|
|
<returnvalue>boolean</returnvalue>
|
|
</para>
|
|
<para>
|
|
Is left array contained in right array?
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type></type> <literal>#</literal> <type>integer[]</type>
|
|
<returnvalue>integer</returnvalue>
|
|
</para>
|
|
<para>
|
|
Returns the number of elements in the array.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>#</literal> <type>integer</type>
|
|
<returnvalue>integer</returnvalue>
|
|
</para>
|
|
<para>
|
|
Returns index of the first array element
|
|
matching the right argument, or 0 if no match.
|
|
(Same as <function>idx</function> function.)
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>+</literal> <type>integer</type>
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Adds element to end of array.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>+</literal> <type>integer[]</type>
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Concatenates the arrays.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>-</literal> <type>integer</type>
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Removes entries matching the right argument from the array.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>-</literal> <type>integer[]</type>
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Removes elements of the right array from the left array.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>|</literal> <type>integer</type>
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Computes the union of the arguments.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>|</literal> <type>integer[]</type>
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Computes the union of the arguments.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>&</literal> <type>integer[]</type>
|
|
<returnvalue>integer[]</returnvalue>
|
|
</para>
|
|
<para>
|
|
Computes the intersection of the arguments.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>integer[]</type> <literal>@@</literal> <type>query_int</type>
|
|
<returnvalue>boolean</returnvalue>
|
|
</para>
|
|
<para>
|
|
Does array satisfy query? (see below)
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<type>query_int</type> <literal>~~</literal> <type>integer[]</type>
|
|
<returnvalue>boolean</returnvalue>
|
|
</para>
|
|
<para>
|
|
Does array satisfy query? (commutator of <literal>@@</literal>)
|
|
</para></entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
(Before PostgreSQL 8.2, the containment operators <literal>@></literal> and
|
|
<literal><@</literal> were respectively called <literal>@</literal> and <literal>~</literal>.
|
|
These names are still available, but are deprecated and will eventually be
|
|
retired. Notice that the old names are reversed from the convention
|
|
formerly followed by the core geometric data types!)
|
|
</para>
|
|
|
|
<para>
|
|
The operators <literal>&&</literal>, <literal>@></literal> and
|
|
<literal><@</literal> are equivalent to <productname>PostgreSQL</productname>'s built-in
|
|
operators of the same names, except that they work only on integer arrays
|
|
that do not contain nulls, while the built-in operators work for any array
|
|
type. This restriction makes them faster than the built-in operators
|
|
in many cases.
|
|
</para>
|
|
|
|
<para>
|
|
The <literal>@@</literal> and <literal>~~</literal> operators test whether an array
|
|
satisfies a <firstterm>query</firstterm>, which is expressed as a value of a
|
|
specialized data type <type>query_int</type>. A <firstterm>query</firstterm>
|
|
consists of integer values that are checked against the elements of
|
|
the array, possibly combined using the operators <literal>&</literal>
|
|
(AND), <literal>|</literal> (OR), and <literal>!</literal> (NOT). Parentheses
|
|
can be used as needed. For example,
|
|
the query <literal>1&(2|3)</literal> matches arrays that contain 1
|
|
and also contain either 2 or 3.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Index Support</title>
|
|
|
|
<para>
|
|
<filename>intarray</filename> provides index support for the
|
|
<literal>&&</literal>, <literal>@></literal>, <literal><@</literal>,
|
|
and <literal>@@</literal> operators, as well as regular array equality.
|
|
</para>
|
|
|
|
<para>
|
|
Two parameterized GiST index operator classes are provided:
|
|
<literal>gist__int_ops</literal> (used by default) is suitable for
|
|
small- to medium-size data sets, while
|
|
<literal>gist__intbig_ops</literal> uses a larger signature and is more
|
|
suitable for indexing large data sets (i.e., columns containing
|
|
a large number of distinct array values).
|
|
The implementation uses an RD-tree data structure with
|
|
built-in lossy compression.
|
|
</para>
|
|
|
|
<para>
|
|
<literal>gist__int_ops</literal> approximates an integer set as an array of
|
|
integer ranges. Its optional integer parameter <literal>numranges</literal>
|
|
determines the maximum number of ranges in
|
|
one index key. The default value of <literal>numranges</literal> is 100.
|
|
Valid values are between 1 and 253. Using larger arrays as GiST index
|
|
keys leads to a more precise search (scanning a smaller fraction of the index and
|
|
fewer heap pages), at the cost of a larger index.
|
|
</para>
|
|
|
|
<para>
|
|
<literal>gist__intbig_ops</literal> approximates an integer set as a bitmap
|
|
signature. Its optional integer parameter <literal>siglen</literal>
|
|
determines the signature length in bytes.
|
|
The default signature length is 16 bytes. Valid values of signature length
|
|
are between 1 and 2024 bytes. Longer signatures lead to a more precise
|
|
search (scanning a smaller fraction of the index and fewer heap pages), at
|
|
the cost of a larger index.
|
|
</para>
|
|
|
|
<para>
|
|
There is also a non-default GIN operator class
|
|
<literal>gin__int_ops</literal> supporting the same operators.
|
|
</para>
|
|
|
|
<para>
|
|
The choice between GiST and GIN indexing depends on the relative
|
|
performance characteristics of GiST and GIN, which are discussed elsewhere.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Example</title>
|
|
|
|
<programlisting>
|
|
-- a message can be in one or more <quote>sections</quote>
|
|
CREATE TABLE message (mid INT PRIMARY KEY, sections INT[], ...);
|
|
|
|
-- create specialized index with signature length of 32 bytes
|
|
CREATE INDEX message_rdtree_idx ON message USING GIST (sections gist__int_ops(siglen=32));
|
|
|
|
-- select messages in section 1 OR 2 - OVERLAP operator
|
|
SELECT message.mid FROM message WHERE message.sections && '{1,2}';
|
|
|
|
-- select messages in sections 1 AND 2 - CONTAINS operator
|
|
SELECT message.mid FROM message WHERE message.sections @> '{1,2}';
|
|
|
|
-- the same, using QUERY operator
|
|
SELECT message.mid FROM message WHERE message.sections @@ '1&2'::query_int;
|
|
</programlisting>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Benchmark</title>
|
|
|
|
<para>
|
|
The source directory <filename>contrib/intarray/bench</filename> contains a
|
|
benchmark test suite, which can be run against an installed
|
|
<productname>PostgreSQL</productname> server. (It also requires <filename>DBD::Pg</filename>
|
|
to be installed.) To run:
|
|
</para>
|
|
|
|
<programlisting>
|
|
cd .../contrib/intarray/bench
|
|
createdb TEST
|
|
psql -c "CREATE EXTENSION intarray" TEST
|
|
./create_test.pl | psql TEST
|
|
./bench.pl
|
|
</programlisting>
|
|
|
|
<para>
|
|
The <filename>bench.pl</filename> script has numerous options, which
|
|
are displayed when it is run without any arguments.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Authors</title>
|
|
|
|
<para>
|
|
All work was done by Teodor Sigaev (<email>teodor@sigaev.ru</email>) and
|
|
Oleg Bartunov (<email>oleg@sai.msu.su</email>). See
|
|
<ulink url="http://www.sai.msu.su/~megera/postgres/gist/"></ulink> for
|
|
additional information. Andrey Oktyabrski did a great work on adding new
|
|
functions and operations.
|
|
</para>
|
|
</sect2>
|
|
|
|
</sect1>
|