1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Indexes with INCLUDE columns and their support in B-tree

This patch introduces INCLUDE clause to index definition.  This clause
specifies a list of columns which will be included as a non-key part in
the index.  The INCLUDE columns exist solely to allow more queries to
benefit from index-only scans.  Also, such columns don't need to have
appropriate operator classes.  Expressions are not supported as INCLUDE
columns since they cannot be used in index-only scans.

Index access methods supporting INCLUDE are indicated by amcaninclude flag
in IndexAmRoutine.  For now, only B-tree indexes support INCLUDE clause.

In B-tree indexes INCLUDE columns are truncated from pivot index tuples
(tuples located in non-leaf pages and high keys).  Therefore, B-tree indexes
now might have variable number of attributes.  This patch also provides
generic facility to support that: pivot tuples contain number of their
attributes in t_tid.ip_posid.  Free 13th bit of t_info is used for indicating
that.  This facility will simplify further support of index suffix truncation.
The changes of above are backward-compatible, pg_upgrade doesn't need special
handling of B-tree indexes for that.

Bump catalog version

Author: Anastasia Lubennikova with contribition by Alexander Korotkov and me
Reviewed by: Peter Geoghegan, Tomas Vondra, Antonin Houska, Jeff Janes,
			 David Rowley, Alexander Korotkov
Discussion: https://www.postgresql.org/message-id/flat/56168952.4010101@postgrespro.ru
This commit is contained in:
Teodor Sigaev
2018-04-07 23:00:39 +03:00
parent 01bb85169a
commit 8224de4f42
89 changed files with 2112 additions and 467 deletions

View File

@ -1,10 +1,14 @@
-- minimal test, basically just verifying that amcheck
CREATE TABLE bttest_a(id int8);
CREATE TABLE bttest_b(id int8);
CREATE TABLE bttest_multi(id int8, data int8);
INSERT INTO bttest_a SELECT * FROM generate_series(1, 100000);
INSERT INTO bttest_b SELECT * FROM generate_series(100000, 1, -1);
INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
CREATE INDEX bttest_a_idx ON bttest_a USING btree (id);
CREATE INDEX bttest_b_idx ON bttest_b USING btree (id);
CREATE UNIQUE INDEX bttest_multi_idx ON bttest_multi
USING btree (id) INCLUDE (data);
CREATE ROLE bttest_role;
-- verify permissions are checked (error due to function not callable)
SET ROLE bttest_role;
@ -93,8 +97,50 @@ WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx
(0 rows)
COMMIT;
-- normal check outside of xact for index with included columns
SELECT bt_index_check('bttest_multi_idx');
bt_index_check
----------------
(1 row)
-- more expansive test for index with included columns
SELECT bt_index_parent_check('bttest_multi_idx', true);
bt_index_parent_check
-----------------------
(1 row)
SELECT bt_index_parent_check('bttest_multi_idx', true);
bt_index_parent_check
-----------------------
(1 row)
-- repeat same checks with index made by insertions
TRUNCATE bttest_multi;
INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
SELECT bt_index_check('bttest_multi_idx');
bt_index_check
----------------
(1 row)
SELECT bt_index_parent_check('bttest_multi_idx', true);
bt_index_parent_check
-----------------------
(1 row)
SELECT bt_index_parent_check('bttest_multi_idx', true);
bt_index_parent_check
-----------------------
(1 row)
-- cleanup
DROP TABLE bttest_a;
DROP TABLE bttest_b;
DROP TABLE bttest_multi;
DROP OWNED BY bttest_role; -- permissions
DROP ROLE bttest_role;

View File

@ -1,12 +1,16 @@
-- minimal test, basically just verifying that amcheck
CREATE TABLE bttest_a(id int8);
CREATE TABLE bttest_b(id int8);
CREATE TABLE bttest_multi(id int8, data int8);
INSERT INTO bttest_a SELECT * FROM generate_series(1, 100000);
INSERT INTO bttest_b SELECT * FROM generate_series(100000, 1, -1);
INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
CREATE INDEX bttest_a_idx ON bttest_a USING btree (id);
CREATE INDEX bttest_b_idx ON bttest_b USING btree (id);
CREATE UNIQUE INDEX bttest_multi_idx ON bttest_multi
USING btree (id) INCLUDE (data);
CREATE ROLE bttest_role;
@ -57,8 +61,23 @@ WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx
AND pid = pg_backend_pid();
COMMIT;
-- normal check outside of xact for index with included columns
SELECT bt_index_check('bttest_multi_idx');
-- more expansive test for index with included columns
SELECT bt_index_parent_check('bttest_multi_idx', true);
SELECT bt_index_parent_check('bttest_multi_idx', true);
-- repeat same checks with index made by insertions
TRUNCATE bttest_multi;
INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
SELECT bt_index_check('bttest_multi_idx');
SELECT bt_index_parent_check('bttest_multi_idx', true);
SELECT bt_index_parent_check('bttest_multi_idx', true);
-- cleanup
DROP TABLE bttest_a;
DROP TABLE bttest_b;
DROP TABLE bttest_multi;
DROP OWNED BY bttest_role; -- permissions
DROP ROLE bttest_role;

View File

@ -617,7 +617,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
/* Internal page -- downlink gets leftmost on next level */
itemid = PageGetItemId(state->target, P_FIRSTDATAKEY(opaque));
itup = (IndexTuple) PageGetItem(state->target, itemid);
nextleveldown.leftmost = ItemPointerGetBlockNumber(&(itup->t_tid));
nextleveldown.leftmost = ItemPointerGetBlockNumberNoCheck(&(itup->t_tid));
nextleveldown.level = opaque->btpo.level - 1;
}
else
@ -722,6 +722,39 @@ bt_target_page_check(BtreeCheckState *state)
elog(DEBUG2, "verifying %u items on %s block %u", max,
P_ISLEAF(topaque) ? "leaf" : "internal", state->targetblock);
/* Check the number of attributes in high key if any */
if (!P_RIGHTMOST(topaque))
{
if (!_bt_check_natts(state->rel, state->target, P_HIKEY))
{
ItemId itemid;
IndexTuple itup;
char *itid,
*htid;
itemid = PageGetItemId(state->target, P_HIKEY);
itup = (IndexTuple) PageGetItem(state->target, itemid);
itid = psprintf("(%u,%u)", state->targetblock, P_HIKEY);
htid = psprintf("(%u,%u)",
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("wrong number of index tuple attributes for index \"%s\"",
RelationGetRelationName(state->rel)),
errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.",
itid,
BTreeTupGetNAtts(itup, state->rel),
P_ISLEAF(topaque) ? "heap" : "index",
htid,
(uint32) (state->targetlsn >> 32),
(uint32) state->targetlsn)));
}
}
/*
* Loop over page items, starting from first non-highkey item, not high
* key (if any). Also, immediately skip "negative infinity" real item (if
@ -760,6 +793,30 @@ bt_target_page_check(BtreeCheckState *state)
(uint32) state->targetlsn),
errhint("This could be a torn page problem")));
/* Check the number of index tuple attributes */
if (!_bt_check_natts(state->rel, state->target, offset))
{
char *itid,
*htid;
itid = psprintf("(%u,%u)", state->targetblock, offset);
htid = psprintf("(%u,%u)",
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("wrong number of index tuple attributes for index \"%s\"",
RelationGetRelationName(state->rel)),
errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.",
itid,
BTreeTupGetNAtts(itup, state->rel),
P_ISLEAF(topaque) ? "heap" : "index",
htid,
(uint32) (state->targetlsn >> 32),
(uint32) state->targetlsn)));
}
/*
* Don't try to generate scankey using "negative infinity" garbage
* data on internal pages
@ -802,8 +859,8 @@ bt_target_page_check(BtreeCheckState *state)
itid = psprintf("(%u,%u)", state->targetblock, offset);
htid = psprintf("(%u,%u)",
ItemPointerGetBlockNumber(&(itup->t_tid)),
ItemPointerGetOffsetNumber(&(itup->t_tid)));
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
@ -834,8 +891,8 @@ bt_target_page_check(BtreeCheckState *state)
itid = psprintf("(%u,%u)", state->targetblock, offset);
htid = psprintf("(%u,%u)",
ItemPointerGetBlockNumber(&(itup->t_tid)),
ItemPointerGetOffsetNumber(&(itup->t_tid)));
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
nitid = psprintf("(%u,%u)", state->targetblock,
OffsetNumberNext(offset));
@ -843,8 +900,8 @@ bt_target_page_check(BtreeCheckState *state)
itemid = PageGetItemId(state->target, OffsetNumberNext(offset));
itup = (IndexTuple) PageGetItem(state->target, itemid);
nhtid = psprintf("(%u,%u)",
ItemPointerGetBlockNumber(&(itup->t_tid)),
ItemPointerGetOffsetNumber(&(itup->t_tid)));
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
@ -932,7 +989,7 @@ bt_target_page_check(BtreeCheckState *state)
*/
if (!P_ISLEAF(topaque) && state->readonly)
{
BlockNumber childblock = ItemPointerGetBlockNumber(&(itup->t_tid));
BlockNumber childblock = ItemPointerGetBlockNumberNoCheck(&(itup->t_tid));
bt_downlink_check(state, childblock, skey);
}
@ -1326,6 +1383,11 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
* or otherwise varied when or how compression was applied, our assumption
* would break, leading to false positive reports of corruption. For now,
* we don't decompress/normalize toasted values as part of fingerprinting.
*
* In future, non-pivot index tuples might get use of
* BT_N_KEYS_OFFSET_MASK. Then binary representation of index tuple linked
* to particular heap tuple might vary and meeds to be normalized before
* bloom filter lookup.
*/
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
itup->t_tid = htup->t_self;
@ -1336,8 +1398,8 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
ItemPointerGetBlockNumber(&(itup->t_tid)),
ItemPointerGetOffsetNumber(&(itup->t_tid)),
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)),
RelationGetRelationName(state->heaprel),
RelationGetRelationName(state->rel)),
!state->readonly
@ -1368,6 +1430,10 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset)
* infinity item is either first or second line item, or there is none
* within page.
*
* "Negative infinity" tuple is a special corner case of pivot tuples,
* it has zero attributes while rest of pivot tuples have nkeyatts number
* of attributes.
*
* Right-most pages don't have a high key, but could be said to
* conceptually have a "positive infinity" high key. Thus, there is a
* symmetry between down link items in parent pages, and high keys in
@ -1391,10 +1457,10 @@ static inline bool
invariant_leq_offset(BtreeCheckState *state, ScanKey key,
OffsetNumber upperbound)
{
int16 natts = state->rel->rd_rel->relnatts;
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
int32 cmp;
cmp = _bt_compare(state->rel, natts, key, state->target, upperbound);
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, upperbound);
return cmp <= 0;
}
@ -1410,10 +1476,10 @@ static inline bool
invariant_geq_offset(BtreeCheckState *state, ScanKey key,
OffsetNumber lowerbound)
{
int16 natts = state->rel->rd_rel->relnatts;
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
int32 cmp;
cmp = _bt_compare(state->rel, natts, key, state->target, lowerbound);
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, lowerbound);
return cmp >= 0;
}
@ -1433,10 +1499,10 @@ invariant_leq_nontarget_offset(BtreeCheckState *state,
Page nontarget, ScanKey key,
OffsetNumber upperbound)
{
int16 natts = state->rel->rd_rel->relnatts;
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
int32 cmp;
cmp = _bt_compare(state->rel, natts, key, nontarget, upperbound);
cmp = _bt_compare(state->rel, nkeyatts, key, nontarget, upperbound);
return cmp <= 0;
}