mirror of
https://github.com/postgres/postgres.git
synced 2025-07-26 01:22:12 +03:00
Make an editorial pass over the newly SGML-ified contrib documentation.
Fix lots of bad markup, bad English, bad explanations. Second round of commits. pgcrypto and pgstandby still to go...
This commit is contained in:
@ -1,196 +1,207 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/intarray.sgml,v 1.5 2007/12/10 05:32:51 tgl Exp $ -->
|
||||
|
||||
<sect1 id="intarray">
|
||||
<title>intarray</title>
|
||||
|
||||
|
||||
<indexterm zone="intarray">
|
||||
<primary>intarray</primary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
This is an implementation of RD-tree data structure using GiST interface
|
||||
of PostgreSQL. It has built-in lossy compression.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Current implementation provides index support for one-dimensional array of
|
||||
integers: gist__int_ops, suitable for small and medium size of arrays (used by
|
||||
default), and gist__intbig_ops for indexing large arrays (we use superimposed
|
||||
signature with length of 4096 bits to represent sets). There is also a
|
||||
non-default gin__int_ops for GIN indexes on integer arrays.
|
||||
The <filename>intarray</> module provides a number of useful functions
|
||||
and operators for manipulating one-dimensional arrays of integers.
|
||||
There is also support for indexed searches using some of the operators.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>Functions</title>
|
||||
<title><filename>intarray</> Functions and Operators</title>
|
||||
|
||||
<itemizedlist>
|
||||
<table id="intarray-func-table">
|
||||
<title><filename>intarray</> Functions</title>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>int icount(int[])</literal> - the number of elements in intarray
|
||||
</para>
|
||||
<programlisting>
|
||||
test=# select icount('{1,2,3}'::int[]);
|
||||
icount
|
||||
--------
|
||||
3
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</listitem>
|
||||
<tgroup cols="5">
|
||||
<thead>
|
||||
<row>
|
||||
<entry>Function</entry>
|
||||
<entry>Return Type</entry>
|
||||
<entry>Description</entry>
|
||||
<entry>Example</entry>
|
||||
<entry>Result</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>int[] sort(int[], 'asc' | 'desc')</literal> - sort intarray
|
||||
</para>
|
||||
<programlisting>
|
||||
test=# select sort('{1,2,3}'::int[],'desc');
|
||||
sort
|
||||
---------
|
||||
{3,2,1}
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</listitem>
|
||||
<tbody>
|
||||
<row>
|
||||
<entry><function>icount(int[])</function></entry>
|
||||
<entry><type>int</type></entry>
|
||||
<entry>number of elements in array</entry>
|
||||
<entry><literal>icount('{1,2,3}'::int[])</literal></entry>
|
||||
<entry><literal>3</literal></entry>
|
||||
</row>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>int[] sort(int[])</literal> - sort in ascending order
|
||||
</para>
|
||||
</listitem>
|
||||
<row>
|
||||
<entry><function>sort(int[], text dir)</function></entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>sort array — <parameter>dir</> must be <literal>asc</> or <literal>desc</></entry>
|
||||
<entry><literal>sort('{1,2,3}'::int[], 'desc')</literal></entry>
|
||||
<entry><literal>{3,2,1}</literal></entry>
|
||||
</row>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>int[] sort_asc(int[]),sort_desc(int[])</literal> - shortcuts for sort
|
||||
</para>
|
||||
</listitem>
|
||||
<row>
|
||||
<entry><function>sort(int[])</function></entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>sort in ascending order</entry>
|
||||
<entry><literal>sort(array[11,77,44])</literal></entry>
|
||||
<entry><literal>{11,44,77}</literal></entry>
|
||||
</row>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>int[] uniq(int[])</literal> - returns unique elements
|
||||
</para>
|
||||
<programlisting>
|
||||
test=# select uniq(sort('{1,2,3,2,1}'::int[]));
|
||||
uniq
|
||||
---------
|
||||
{1,2,3}
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</listitem>
|
||||
<row>
|
||||
<entry><function>sort_asc(int[])</function></entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>sort in ascending order</entry>
|
||||
<entry><literal></literal></entry>
|
||||
<entry><literal></literal></entry>
|
||||
</row>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>int idx(int[], int item)</literal> - returns index of first
|
||||
intarray matching element to item, or '0' if matching failed.
|
||||
</para>
|
||||
<programlisting>
|
||||
test=# select idx('{1,2,3,2,1}'::int[],2);
|
||||
idx
|
||||
-----
|
||||
2
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</listitem>
|
||||
<row>
|
||||
<entry><function>sort_desc(int[])</function></entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>sort in descending order</entry>
|
||||
<entry><literal></literal></entry>
|
||||
<entry><literal></literal></entry>
|
||||
</row>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>int[] subarray(int[],int START [, int LEN])</literal> - returns
|
||||
part of intarray starting from element number START (from 1) and length LEN.
|
||||
</para>
|
||||
<programlisting>
|
||||
test=# select subarray('{1,2,3,2,1}'::int[],2,3);
|
||||
subarray
|
||||
----------
|
||||
{2,3,2}
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</listitem>
|
||||
<row>
|
||||
<entry><function>uniq(int[])</function></entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>remove adjacent duplicates</entry>
|
||||
<entry><literal>uniq(sort('{1,2,3,2,1}'::int[]))</literal></entry>
|
||||
<entry><literal>{1,2,3}</literal></entry>
|
||||
</row>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>int[] intset(int4)</literal> - casting int4 to int[]
|
||||
</para>
|
||||
<programlisting>
|
||||
test=# select intset(1);
|
||||
intset
|
||||
--------
|
||||
{1}
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</listitem>
|
||||
<row>
|
||||
<entry><function>idx(int[], int item)</function></entry>
|
||||
<entry><type>int</type></entry>
|
||||
<entry>index of first element matching <parameter>item</> (0 if none)</entry>
|
||||
<entry><literal>idx(array[11,22,33,22,11], 22)</literal></entry>
|
||||
<entry><literal>2</literal></entry>
|
||||
</row>
|
||||
|
||||
</itemizedlist>
|
||||
</sect2>
|
||||
<row>
|
||||
<entry><function>subarray(int[], int start, int len)</function></entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>portion of array starting at position <parameter>start</>, len <parameter>elements</></entry>
|
||||
<entry><literal>subarray('{1,2,3,2,1}'::int[], 2, 3)</literal></entry>
|
||||
<entry><literal>{2,3,2}</literal></entry>
|
||||
</row>
|
||||
|
||||
<sect2>
|
||||
<title>Operations</title>
|
||||
<table>
|
||||
<title>Operations</title>
|
||||
<tgroup cols="2">
|
||||
<row>
|
||||
<entry><function>subarray(int[], int start)</function></entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>portion of array starting at position <parameter>start</></entry>
|
||||
<entry><literal>subarray('{1,2,3,2,1}'::int[], 2)</literal></entry>
|
||||
<entry><literal>{2,3,2,1}</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><function>intset(int)</function></entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>make single-element array</entry>
|
||||
<entry><literal>intset(42)</literal></entry>
|
||||
<entry><literal>{42}</literal></entry>
|
||||
</row>
|
||||
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<table id="intarray-op-table">
|
||||
<title><filename>intarray</> Operators</title>
|
||||
|
||||
<tgroup cols="3">
|
||||
<thead>
|
||||
<row>
|
||||
<entry>Operator</entry>
|
||||
<entry>Returns</entry>
|
||||
<entry>Description</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry><literal>int[] && int[]</literal></entry>
|
||||
<entry>overlap - returns TRUE if arrays have at least one common element</entry>
|
||||
<entry><type>boolean</type></entry>
|
||||
<entry>overlap — <literal>true</> if arrays have at least one common element</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] @> int[]</literal></entry>
|
||||
<entry>contains - returns TRUE if left array contains right array</entry>
|
||||
<entry><type>boolean</type></entry>
|
||||
<entry>contains — <literal>true</> if left array contains right array</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] <@ int[]</literal></entry>
|
||||
<entry>contained - returns TRUE if left array is contained in right array</entry>
|
||||
<entry><type>boolean</type></entry>
|
||||
<entry>contained — <literal>true</> if left array is contained in right array</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal># int[]</literal></entry>
|
||||
<entry>returns the number of elements in array</entry>
|
||||
<entry><type>int</type></entry>
|
||||
<entry>number of elements in array</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] # int</literal></entry>
|
||||
<entry><type>int</type></entry>
|
||||
<entry>index (same as <function>idx</> function)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] + int</literal></entry>
|
||||
<entry>push element to array ( add to end of array)</entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>push element onto array (add it to end of array)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] + int[] </literal></entry>
|
||||
<entry>merge of arrays (right array added to the end of left one)</entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>array concatenation (right array added to the end of left one)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] - int</literal></entry>
|
||||
<entry>remove entries matched by right argument from array</entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>remove entries matching right argument from array</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] - int[]</literal></entry>
|
||||
<entry>remove right array from left</entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>remove elements of right array from left</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] | int</literal></entry>
|
||||
<entry>returns intarray - union of arguments</entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>union of arguments</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>int[] | int[]</literal></entry>
|
||||
<entry>returns intarray as a union of two arrays</entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>union of arrays</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><literal>int[] & int[]</literal></entry>
|
||||
<entry>returns intersection of arrays</entry>
|
||||
<entry><type>int[]</type></entry>
|
||||
<entry>intersection of arrays</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><literal>int[] @@ query_int</literal></entry>
|
||||
<entry>
|
||||
returns TRUE if array satisfies query (like
|
||||
<literal>'1&(2|3)'</literal>)
|
||||
</entry>
|
||||
<entry><type>boolean</type></entry>
|
||||
<entry><literal>true</> if array satisfies query (see below)</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><literal>query_int ~~ int[]</literal></entry>
|
||||
<entry>returns TRUE if array satisfies query (commutator of @@)</entry>
|
||||
<entry><type>boolean</type></entry>
|
||||
<entry><literal>true</> if array satisfies query (commutator of <literal>@@</>)</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>
|
||||
(Before PostgreSQL 8.2, the containment operators @> and <@ were
|
||||
respectively called @ and ~. These names are still available, but are
|
||||
@ -198,85 +209,102 @@ test=# select intset(1);
|
||||
are reversed from the convention formerly followed by the core geometric
|
||||
datatypes!)
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The <literal>@@</> and <literal>~~</> operators test whether an array
|
||||
satisfies a <firstterm>query</>, which is expressed as a value of a
|
||||
specialized data type <type>query_int</>. A <firstterm>query</>
|
||||
consists of integer values that are checked against the elements of
|
||||
the array, possibly combined using the operators <literal>&</>
|
||||
(AND), <literal>|</> (OR), and <literal>!</> (NOT). Parentheses
|
||||
can be used as needed. For example,
|
||||
the query <literal>1&(2|3)</> matches arrays that contain 1
|
||||
and also contain either 2 or 3.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Index Support</title>
|
||||
|
||||
<para>
|
||||
<filename>intarray</> provides index support for the
|
||||
<literal>&&</>, <literal>@></>, <literal><@</>,
|
||||
and <literal>@@</> operators, as well as regular array equality.
|
||||
The implementation uses an RD-tree data structure with
|
||||
built-in lossy compression.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Two GiST index operator classes are provided:
|
||||
<literal>gist__int_ops</> (used by default) is suitable for
|
||||
small and medium-size arrays, while
|
||||
<literal>gist__intbig_ops</> uses a larger signature and is more
|
||||
suitable for indexing large arrays.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
There is also a non-default GIN operator class
|
||||
<literal>gin__int_ops</>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The choice between GiST and GIN indexing depends on the relative
|
||||
performance characteristics of GiST and GIN, which are discussed elsewhere.
|
||||
As a rule of thumb, a GIN index is faster to search than a GiST index, but
|
||||
slower to build or update; so GIN is better suited for static data and GiST
|
||||
for often-updated data.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Example</title>
|
||||
|
||||
<programlisting>
|
||||
CREATE TABLE message (mid INT NOT NULL,sections INT[]);
|
||||
CREATE TABLE message_section_map (mid INT NOT NULL,sid INT NOT NULL);
|
||||
-- a message can be in one or more <quote>sections</>
|
||||
CREATE TABLE message (mid INT PRIMARY KEY, sections INT[], ...);
|
||||
|
||||
-- create indices
|
||||
CREATE unique index message_key ON message ( mid );
|
||||
CREATE unique index message_section_map_key2 ON message_section_map (sid, mid );
|
||||
CREATE INDEX message_rdtree_idx ON message USING GIST ( sections gist__int_ops);
|
||||
-- create specialized index
|
||||
CREATE INDEX message_rdtree_idx ON message USING GIST (sections gist__int_ops);
|
||||
|
||||
-- select some messages with section in 1 OR 2 - OVERLAP operator
|
||||
SELECT message.mid FROM message WHERE message.sections && '{1,2}';
|
||||
-- select messages in section 1 OR 2 - OVERLAP operator
|
||||
SELECT message.mid FROM message WHERE message.sections && '{1,2}';
|
||||
|
||||
-- select messages contains in sections 1 AND 2 - CONTAINS operator
|
||||
-- select messages in sections 1 AND 2 - CONTAINS operator
|
||||
SELECT message.mid FROM message WHERE message.sections @> '{1,2}';
|
||||
-- the same, CONTAINED operator
|
||||
SELECT message.mid FROM message WHERE '{1,2}' <@ message.sections;
|
||||
|
||||
-- the same, using QUERY operator
|
||||
SELECT message.mid FROM message WHERE message.sections @@ '1&2'::query_int;
|
||||
</programlisting>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Benchmark</title>
|
||||
|
||||
<para>
|
||||
subdirectory bench contains benchmark suite.
|
||||
The source directory <filename>contrib/intarray/bench</> contains a
|
||||
benchmark test suite. To run:
|
||||
</para>
|
||||
|
||||
<programlisting>
|
||||
cd ./bench
|
||||
1. createdb TEST
|
||||
2. psql TEST < ../_int.sql
|
||||
3. ./create_test.pl | psql TEST
|
||||
4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries
|
||||
with/without RD-Tree. Run script without arguments to
|
||||
see availbale options.
|
||||
|
||||
a)test without RD-Tree (OR)
|
||||
./bench.pl -d TEST -c -s 1,2 -v
|
||||
b)test with RD-Tree
|
||||
./bench.pl -d TEST -c -s 1,2 -v -r
|
||||
|
||||
BENCHMARKS:
|
||||
|
||||
Size of table <message>: 200000
|
||||
Size of table <message_section_map>: 269133
|
||||
|
||||
Distribution of messages by sections:
|
||||
|
||||
section 0: 74377 messages
|
||||
section 1: 16284 messages
|
||||
section 50: 1229 messages
|
||||
section 99: 683 messages
|
||||
|
||||
old - without RD-Tree support,
|
||||
new - with RD-Tree
|
||||
|
||||
+----------+---------------+----------------+
|
||||
|Search set|OR, time in sec|AND, time in sec|
|
||||
| +-------+-------+--------+-------+
|
||||
| | old | new | old | new |
|
||||
+----------+-------+-------+--------+-------+
|
||||
| 1| 0.625| 0.101| -| -|
|
||||
+----------+-------+-------+--------+-------+
|
||||
| 99| 0.018| 0.017| -| -|
|
||||
+----------+-------+-------+--------+-------+
|
||||
| 1,2| 0.766| 0.133| 0.628| 0.045|
|
||||
+----------+-------+-------+--------+-------+
|
||||
| 1,2,50,65| 0.794| 0.141| 0.030| 0.006|
|
||||
+----------+-------+-------+--------+-------+
|
||||
cd .../bench
|
||||
createdb TEST
|
||||
psql TEST < ../_int.sql
|
||||
./create_test.pl | psql TEST
|
||||
./bench.pl
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
The <filename>bench.pl</> script has numerous options, which
|
||||
are displayed when it is run without any arguments.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Authors</title>
|
||||
|
||||
<para>
|
||||
All work was done by Teodor Sigaev (<email>teodor@stack.net</email>) and Oleg
|
||||
Bartunov (<email>oleg@sai.msu.su</email>). See
|
||||
All work was done by Teodor Sigaev (<email>teodor@stack.net</email>) and
|
||||
Oleg Bartunov (<email>oleg@sai.msu.su</email>). See
|
||||
<ulink url="http://www.sai.msu.su/~megera/postgres/gist"></ulink> for
|
||||
additional information. Andrey Oktyabrski did a great work on adding new
|
||||
functions and operations.
|
||||
@ -284,4 +312,3 @@ SELECT message.mid FROM message WHERE '{1,2}' <@ message.sections;
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
Reference in New Issue
Block a user