mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Add the ability to create indexes 'concurrently', that is, without
blocking concurrent writes to the table. Greg Stark, with a little help from Tom Lane.
This commit is contained in:
@ -1,4 +1,4 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.129 2006/07/31 20:08:55 tgl Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.130 2006/08/25 04:06:44 tgl Exp $ -->
|
||||
<!--
|
||||
Documentation of the system catalogs, directed toward PostgreSQL developers
|
||||
-->
|
||||
@ -2420,6 +2420,17 @@
|
||||
<entry>If true, the table was last clustered on this index</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><structfield>indisvalid</structfield></entry>
|
||||
<entry><type>bool</type></entry>
|
||||
<entry></entry>
|
||||
<entry>If true, the index is currently valid for queries.
|
||||
False means the index is possibly incomplete: it must still be
|
||||
inserted into by INSERT/UPDATE operations, but it cannot safely be
|
||||
used for queries, and if it is unique, the uniqueness shouldn't be
|
||||
relied on either.</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><structfield>indkey</structfield></entry>
|
||||
<entry><type>int2vector</type></entry>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.16 2006/07/31 20:08:59 tgl Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.17 2006/08/25 04:06:44 tgl Exp $ -->
|
||||
|
||||
<chapter id="indexam">
|
||||
<title>Index Access Method Interface Definition</title>
|
||||
@ -648,6 +648,16 @@ amrestrpos (IndexScanDesc scan);
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Furthermore, immediately before raising a uniqueness violation
|
||||
according to the above rules, the access method must recheck the
|
||||
liveness of the row being inserted. If it is committed dead then
|
||||
no error should be raised. (This case cannot occur during the
|
||||
ordinary scenario of inserting a row that's just been created by
|
||||
the current transaction. It can happen during
|
||||
<command>CREATE UNIQUE INDEX CONCURRENTLY</>, however.)
|
||||
</para>
|
||||
|
||||
<para>
|
||||
We require the index access method to apply these tests itself, which
|
||||
means that it must reach into the heap to check the commit status of
|
||||
|
@ -1,4 +1,4 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.57 2006/05/24 11:01:39 teodor Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.58 2006/08/25 04:06:44 tgl Exp $ -->
|
||||
|
||||
<chapter id="indexes">
|
||||
<title id="indexes-title">Indexes</title>
|
||||
@ -90,6 +90,17 @@ CREATE INDEX test1_id_index ON test1 (id);
|
||||
significantly speed up queries with joins.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Creating an index on a large table can take a long time. By default,
|
||||
<productname>PostgreSQL</productname> allows reads (selects) to occur
|
||||
on the table in parallel with index creation, but writes (inserts,
|
||||
updates, deletes) are blocked until the index build is finished.
|
||||
It is possible to allow writes to occur in parallel with index
|
||||
creation, but there are several caveats to be aware of —
|
||||
for more information see <xref linkend="SQL-CREATEINDEX-CONCURRENTLY"
|
||||
endterm="SQL-CREATEINDEX-CONCURRENTLY-title">.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
After an index is created, the system has to keep it synchronized with the
|
||||
table. This adds overhead to data manipulation operations.
|
||||
|
@ -1,4 +1,4 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/mvcc.sgml,v 2.56 2006/04/23 03:39:52 momjian Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/mvcc.sgml,v 2.57 2006/08/25 04:06:45 tgl Exp $ -->
|
||||
|
||||
<chapter id="mvcc">
|
||||
<title>Concurrency Control</title>
|
||||
@ -622,7 +622,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2;
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Acquired by <command>VACUUM</command> (without <option>FULL</option>).
|
||||
Acquired by <command>VACUUM</command> (without <option>FULL</option>)
|
||||
and by <command>CREATE INDEX CONCURRENTLY</>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -641,7 +642,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2;
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Acquired by <command>CREATE INDEX</command>.
|
||||
Acquired by <command>CREATE INDEX</command>
|
||||
(without <option>CONCURRENTLY</option>).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/create_index.sgml,v 1.55 2006/07/11 21:05:57 tgl Exp $
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/create_index.sgml,v 1.56 2006/08/25 04:06:45 tgl Exp $
|
||||
PostgreSQL documentation
|
||||
-->
|
||||
|
||||
@ -20,7 +20,7 @@ PostgreSQL documentation
|
||||
|
||||
<refsynopsisdiv>
|
||||
<synopsis>
|
||||
CREATE [ UNIQUE ] INDEX <replaceable class="parameter">name</replaceable> ON <replaceable class="parameter">table</replaceable> [ USING <replaceable class="parameter">method</replaceable> ]
|
||||
CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] <replaceable class="parameter">name</replaceable> ON <replaceable class="parameter">table</replaceable> [ USING <replaceable class="parameter">method</replaceable> ]
|
||||
( { <replaceable class="parameter">column</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ <replaceable class="parameter">opclass</replaceable> ] [, ...] )
|
||||
[ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> = <replaceable class="PARAMETER">value</replaceable> [, ... ] ) ]
|
||||
[ TABLESPACE <replaceable class="parameter">tablespace</replaceable> ]
|
||||
@ -110,6 +110,21 @@ CREATE [ UNIQUE ] INDEX <replaceable class="parameter">name</replaceable> ON <re
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><literal>CONCURRENTLY</literal></term>
|
||||
<listitem>
|
||||
<para>
|
||||
When this option is used, <productname>PostgreSQL</> will build the
|
||||
index without taking any locks that prevent concurrent inserts,
|
||||
updates, or deletes on the table; whereas a standard index build
|
||||
locks out writes (but not reads) on the table until it's done.
|
||||
There are several caveats to be aware of when using this option
|
||||
— see <xref linkend="SQL-CREATEINDEX-CONCURRENTLY"
|
||||
endterm="SQL-CREATEINDEX-CONCURRENTLY-title">.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><replaceable class="parameter">name</replaceable></term>
|
||||
<listitem>
|
||||
@ -239,6 +254,82 @@ CREATE [ UNIQUE ] INDEX <replaceable class="parameter">name</replaceable> ON <re
|
||||
</variablelist>
|
||||
|
||||
</refsect2>
|
||||
|
||||
<refsect2 id="SQL-CREATEINDEX-CONCURRENTLY">
|
||||
<title id="SQL-CREATEINDEX-CONCURRENTLY-title">Building Indexes Concurrently</title>
|
||||
|
||||
<indexterm zone="SQL-CREATEINDEX-CONCURRENTLY">
|
||||
<primary>index</primary>
|
||||
<secondary>building concurrently</secondary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
Creating an index for a large table can be a long operation. In large data
|
||||
warehousing applications it can easily take hours or even days to build
|
||||
indexes. It's important to understand the impact creating indexes has on a
|
||||
system.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Normally <productname>PostgreSQL</> locks the table to be indexed against
|
||||
writes and performs the entire index build with a single scan of the
|
||||
table. Other transactions can still read the table, but if they try to
|
||||
insert, update, or delete rows in the table they will block until the
|
||||
index build is finished.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<productname>PostgreSQL</> also supports building indexes without locking
|
||||
out writes. This method is invoked by specifying the
|
||||
<literal>CONCURRENTLY</> option of <command>CREATE INDEX</>.
|
||||
When this option is used,
|
||||
<productname>PostgreSQL</> must perform two scans of the table, and in
|
||||
addition it must wait for all existing transactions to terminate. Thus
|
||||
this method requires more total work than a standard index build and takes
|
||||
significantly longer to complete. However, since it allows normal
|
||||
operations to continue while the index is built, this method is useful for
|
||||
adding new indexes in a production environment. Of course, the extra CPU
|
||||
and I/O load imposed by the index creation may slow other operations.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If a problem arises during the second scan of the table, such as a
|
||||
uniqueness violation in a unique index, the <command>CREATE INDEX</>
|
||||
command will fail but leave behind an <quote>invalid</> index. This index
|
||||
will be ignored for querying purposes because it may be incomplete;
|
||||
however it will still consume update overhead. The recommended recovery
|
||||
method in such cases is to drop the index and try again to perform
|
||||
<command>CREATE INDEX CONCURRENTLY</>. (Another possibility is to rebuild
|
||||
the index with <command>REINDEX</>. However, since <command>REINDEX</>
|
||||
does not support concurrent builds, this option is unlikely to seem
|
||||
attractive.)
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Another caveat when building a unique index concurrently is that the
|
||||
uniqueness constraint is already being enforced against other transactions
|
||||
when the second table scan begins. This means that constraint violations
|
||||
could be reported in other queries prior to the index becoming available
|
||||
for use, or even in cases where the index build eventually fails. Also,
|
||||
if a failure does occur in the second scan, the <quote>invalid</> index
|
||||
continues to enforce its uniqueness constraint afterwards.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Concurrent builds of expression indexes and partial indexes are supported.
|
||||
Errors occurring in the evaluation of these expressions could cause
|
||||
behavior similar to that described above for unique constraint violations.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Regular index builds permit other regular index builds on the
|
||||
same table to occur in parallel, but only one concurrent index build
|
||||
can occur on a table at a time. In both cases, no other types of schema
|
||||
modification on the table are allowed meanwhile. Another difference
|
||||
is that a regular <command>CREATE INDEX</> command can be performed within
|
||||
a transaction block, but <command>CREATE INDEX CONCURRENTLY</> cannot.
|
||||
</para>
|
||||
</refsect2>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@ -339,15 +430,22 @@ Is this example correct?
|
||||
To create a GiST index on a point attribute so that we
|
||||
can efficiently use box operators on the result of the
|
||||
conversion function:
|
||||
</para>
|
||||
<programlisting>
|
||||
CREATE INDEX pointloc
|
||||
ON points USING GIST (point2box(location) box_ops);
|
||||
SELECT * FROM points
|
||||
WHERE point2box(points.pointloc) = boxes.box;
|
||||
</programlisting>
|
||||
</para>
|
||||
-->
|
||||
|
||||
<para>
|
||||
To create an index without locking out writes to the table:
|
||||
<programlisting>
|
||||
CREATE INDEX CONCURRENTLY sales_quantity_index ON sales_table (quantity);
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/reindex.sgml,v 1.31 2006/07/04 18:07:24 tgl Exp $
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/reindex.sgml,v 1.32 2006/08/25 04:06:45 tgl Exp $
|
||||
PostgreSQL documentation
|
||||
-->
|
||||
|
||||
@ -30,7 +30,7 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } <replaceable class="PARAMETER">nam
|
||||
<para>
|
||||
<command>REINDEX</command> rebuilds an index using the data
|
||||
stored in the index's table, replacing the old copy of the index. There are
|
||||
three main reasons to use <command>REINDEX</command>:
|
||||
several scenarios in which to use <command>REINDEX</command>:
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
@ -61,6 +61,18 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } <replaceable class="PARAMETER">nam
|
||||
for an index, and wish to ensure that the change has taken full effect.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
An index build with the <literal>CONCURRENTLY</> option failed, leaving
|
||||
an <quote>invalid</> index. Such indexes are useless but it can be
|
||||
convenient to use <command>REINDEX</> to rebuild them. Note that
|
||||
<command>REINDEX</> will not perform a concurrent build. To build the
|
||||
index without interfering with production you should drop the index and
|
||||
reissue the <command>CREATE INDEX CONCURRENTLY</> command.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
Reference in New Issue
Block a user