mirror of
https://github.com/postgres/postgres.git
synced 2025-05-17 06:41:24 +03:00
342 lines
12 KiB
Plaintext
342 lines
12 KiB
Plaintext
<!-- doc/src/sgml/intarray.sgml -->
|
|
|
|
<sect1 id="intarray" xreflabel="intarray">
|
|
<title>intarray</title>
|
|
|
|
<indexterm zone="intarray">
|
|
<primary>intarray</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <filename>intarray</> module provides a number of useful functions
|
|
and operators for manipulating null-free arrays of integers.
|
|
There is also support for indexed searches using some of the operators.
|
|
</para>
|
|
|
|
<para>
|
|
All of these operations will throw an error if a supplied array contains any
|
|
NULL elements.
|
|
</para>
|
|
|
|
<para>
|
|
Many of these operations are only sensible for one-dimensional arrays.
|
|
Although they will accept input arrays of more dimensions, the data is
|
|
treated as though it were a linear array in storage order.
|
|
</para>
|
|
|
|
<sect2>
|
|
<title><filename>intarray</> Functions and Operators</title>
|
|
|
|
<para>
|
|
The functions provided by the <filename>intarray</filename> module
|
|
are shown in <xref linkend="intarray-func-table">, the operators
|
|
in <xref linkend="intarray-op-table">.
|
|
</para>
|
|
|
|
<table id="intarray-func-table">
|
|
<title><filename>intarray</> Functions</title>
|
|
|
|
<tgroup cols="5">
|
|
<thead>
|
|
<row>
|
|
<entry>Function</entry>
|
|
<entry>Return Type</entry>
|
|
<entry>Description</entry>
|
|
<entry>Example</entry>
|
|
<entry>Result</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry><function>icount(int[])</function><indexterm><primary>icount</primary></indexterm></entry>
|
|
<entry><type>int</type></entry>
|
|
<entry>number of elements in array</entry>
|
|
<entry><literal>icount('{1,2,3}'::int[])</literal></entry>
|
|
<entry><literal>3</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>sort(int[], text dir)</function><indexterm><primary>sort</primary></indexterm></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>sort array — <parameter>dir</> must be <literal>asc</> or <literal>desc</></entry>
|
|
<entry><literal>sort('{1,2,3}'::int[], 'desc')</literal></entry>
|
|
<entry><literal>{3,2,1}</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>sort(int[])</function></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>sort in ascending order</entry>
|
|
<entry><literal>sort(array[11,77,44])</literal></entry>
|
|
<entry><literal>{11,44,77}</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>sort_asc(int[])</function><indexterm><primary>sort_asc</primary></indexterm></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>sort in ascending order</entry>
|
|
<entry><literal></literal></entry>
|
|
<entry><literal></literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>sort_desc(int[])</function><indexterm><primary>sort_desc</primary></indexterm></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>sort in descending order</entry>
|
|
<entry><literal></literal></entry>
|
|
<entry><literal></literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>uniq(int[])</function><indexterm><primary>uniq</primary></indexterm></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>remove adjacent duplicates</entry>
|
|
<entry><literal>uniq(sort('{1,2,3,2,1}'::int[]))</literal></entry>
|
|
<entry><literal>{1,2,3}</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>idx(int[], int item)</function><indexterm><primary>idx</primary></indexterm></entry>
|
|
<entry><type>int</type></entry>
|
|
<entry>index of first element matching <parameter>item</> (0 if none)</entry>
|
|
<entry><literal>idx(array[11,22,33,22,11], 22)</literal></entry>
|
|
<entry><literal>2</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>subarray(int[], int start, int len)</function><indexterm><primary>subarray</primary></indexterm></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>portion of array starting at position <parameter>start</>, <parameter>len</> elements</entry>
|
|
<entry><literal>subarray('{1,2,3,2,1}'::int[], 2, 3)</literal></entry>
|
|
<entry><literal>{2,3,2}</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>subarray(int[], int start)</function></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>portion of array starting at position <parameter>start</></entry>
|
|
<entry><literal>subarray('{1,2,3,2,1}'::int[], 2)</literal></entry>
|
|
<entry><literal>{2,3,2,1}</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><function>intset(int)</function><indexterm><primary>intset</primary></indexterm></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>make single-element array</entry>
|
|
<entry><literal>intset(42)</literal></entry>
|
|
<entry><literal>{42}</literal></entry>
|
|
</row>
|
|
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<table id="intarray-op-table">
|
|
<title><filename>intarray</> Operators</title>
|
|
|
|
<tgroup cols="3">
|
|
<thead>
|
|
<row>
|
|
<entry>Operator</entry>
|
|
<entry>Returns</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>int[] && int[]</literal></entry>
|
|
<entry><type>boolean</type></entry>
|
|
<entry>overlap — <literal>true</> if arrays have at least one common element</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] @> int[]</literal></entry>
|
|
<entry><type>boolean</type></entry>
|
|
<entry>contains — <literal>true</> if left array contains right array</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] <@ int[]</literal></entry>
|
|
<entry><type>boolean</type></entry>
|
|
<entry>contained — <literal>true</> if left array is contained in right array</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal># int[]</literal></entry>
|
|
<entry><type>int</type></entry>
|
|
<entry>number of elements in array</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] # int</literal></entry>
|
|
<entry><type>int</type></entry>
|
|
<entry>index (same as <function>idx</> function)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] + int</literal></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>push element onto array (add it to end of array)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] + int[] </literal></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>array concatenation (right array added to the end of left one)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] - int</literal></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>remove entries matching right argument from array</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] - int[]</literal></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>remove elements of right array from left</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] | int</literal></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>union of arguments</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] | int[]</literal></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>union of arrays</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] & int[]</literal></entry>
|
|
<entry><type>int[]</type></entry>
|
|
<entry>intersection of arrays</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>int[] @@ query_int</literal></entry>
|
|
<entry><type>boolean</type></entry>
|
|
<entry><literal>true</> if array satisfies query (see below)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>query_int ~~ int[]</literal></entry>
|
|
<entry><type>boolean</type></entry>
|
|
<entry><literal>true</> if array satisfies query (commutator of <literal>@@</>)</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
(Before PostgreSQL 8.2, the containment operators <literal>@></> and
|
|
<literal><@</> were respectively called <literal>@</> and <literal>~</>.
|
|
These names are still available, but are deprecated and will eventually be
|
|
retired. Notice that the old names are reversed from the convention
|
|
formerly followed by the core geometric data types!)
|
|
</para>
|
|
|
|
<para>
|
|
The operators <literal>&&</>, <literal>@></> and
|
|
<literal><@</> are equivalent to <productname>PostgreSQL</>'s built-in
|
|
operators of the same names, except that they work only on integer arrays
|
|
that do not contain nulls, while the built-in operators work for any array
|
|
type. This restriction makes them faster than the built-in operators
|
|
in many cases.
|
|
</para>
|
|
|
|
<para>
|
|
The <literal>@@</> and <literal>~~</> operators test whether an array
|
|
satisfies a <firstterm>query</>, which is expressed as a value of a
|
|
specialized data type <type>query_int</>. A <firstterm>query</>
|
|
consists of integer values that are checked against the elements of
|
|
the array, possibly combined using the operators <literal>&</>
|
|
(AND), <literal>|</> (OR), and <literal>!</> (NOT). Parentheses
|
|
can be used as needed. For example,
|
|
the query <literal>1&(2|3)</> matches arrays that contain 1
|
|
and also contain either 2 or 3.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Index Support</title>
|
|
|
|
<para>
|
|
<filename>intarray</> provides index support for the
|
|
<literal>&&</>, <literal>@></>, <literal><@</>,
|
|
and <literal>@@</> operators, as well as regular array equality.
|
|
</para>
|
|
|
|
<para>
|
|
Two GiST index operator classes are provided:
|
|
<literal>gist__int_ops</> (used by default) is suitable for
|
|
small- to medium-size data sets, while
|
|
<literal>gist__intbig_ops</> uses a larger signature and is more
|
|
suitable for indexing large data sets (i.e., columns containing
|
|
a large number of distinct array values).
|
|
The implementation uses an RD-tree data structure with
|
|
built-in lossy compression.
|
|
</para>
|
|
|
|
<para>
|
|
There is also a non-default GIN operator class
|
|
<literal>gin__int_ops</> supporting the same operators.
|
|
</para>
|
|
|
|
<para>
|
|
The choice between GiST and GIN indexing depends on the relative
|
|
performance characteristics of GiST and GIN, which are discussed elsewhere.
|
|
As a rule of thumb, a GIN index is faster to search than a GiST index, but
|
|
slower to build or update; so GIN is better suited for static data and GiST
|
|
for often-updated data.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Example</title>
|
|
|
|
<programlisting>
|
|
-- a message can be in one or more <quote>sections</>
|
|
CREATE TABLE message (mid INT PRIMARY KEY, sections INT[], ...);
|
|
|
|
-- create specialized index
|
|
CREATE INDEX message_rdtree_idx ON message USING GIST (sections gist__int_ops);
|
|
|
|
-- select messages in section 1 OR 2 - OVERLAP operator
|
|
SELECT message.mid FROM message WHERE message.sections && '{1,2}';
|
|
|
|
-- select messages in sections 1 AND 2 - CONTAINS operator
|
|
SELECT message.mid FROM message WHERE message.sections @> '{1,2}';
|
|
|
|
-- the same, using QUERY operator
|
|
SELECT message.mid FROM message WHERE message.sections @@ '1&2'::query_int;
|
|
</programlisting>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Benchmark</title>
|
|
|
|
<para>
|
|
The source directory <filename>contrib/intarray/bench</> contains a
|
|
benchmark test suite. To run:
|
|
</para>
|
|
|
|
<programlisting>
|
|
cd .../bench
|
|
createdb TEST
|
|
psql TEST < ../_int.sql
|
|
./create_test.pl | psql TEST
|
|
./bench.pl
|
|
</programlisting>
|
|
|
|
<para>
|
|
The <filename>bench.pl</> script has numerous options, which
|
|
are displayed when it is run without any arguments.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Authors</title>
|
|
|
|
<para>
|
|
All work was done by Teodor Sigaev (<email>teodor@sigaev.ru</email>) and
|
|
Oleg Bartunov (<email>oleg@sai.msu.su</email>). See
|
|
<ulink url="http://www.sai.msu.su/~megera/postgres/gist/"></ulink> for
|
|
additional information. Andrey Oktyabrski did a great work on adding new
|
|
functions and operations.
|
|
</para>
|
|
</sect2>
|
|
|
|
</sect1>
|