mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Teach pageinspect about nbtree deduplication.
Add a new bt_metap() column to display the metapage's allequalimage field. Also add three new columns to contrib/pageinspect's bt_page_items() function: * Add a boolean column ("dead") that displays the LP_DEAD bit value for each non-pivot tuple. * Add a TID column ("htid") that displays a single heap TID value for each tuple. This is the TID that is returned by BTreeTupleGetHeapTID(), so comparable values are shown for pivot tuples, plain non-pivot tuples, and posting list tuples. * Add a TID array column ("tids") that displays TIDs from each tuple's posting list, if any. This works just like the "tids" column from pageinspect's gin_leafpage_items() function. No version bump for the pageinspect extension, since there hasn't been a stable Postgres release since the last version bump (the last bump was part of commit 58b4cb30). Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-WzmSMmU2eNvY9+a4MNP+z02h6sa-uxZvN3un6jY02ZVBSw@mail.gmail.com
This commit is contained in:
parent
58c47ccfff
commit
93ee38eade
@ -31,9 +31,11 @@
|
|||||||
#include "access/relation.h"
|
#include "access/relation.h"
|
||||||
#include "catalog/namespace.h"
|
#include "catalog/namespace.h"
|
||||||
#include "catalog/pg_am.h"
|
#include "catalog/pg_am.h"
|
||||||
|
#include "catalog/pg_type.h"
|
||||||
#include "funcapi.h"
|
#include "funcapi.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "pageinspect.h"
|
#include "pageinspect.h"
|
||||||
|
#include "utils/array.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
#include "utils/varlena.h"
|
#include "utils/varlena.h"
|
||||||
@ -45,6 +47,8 @@ PG_FUNCTION_INFO_V1(bt_page_stats);
|
|||||||
|
|
||||||
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
|
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
|
||||||
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
|
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
|
||||||
|
#define DatumGetItemPointer(X) ((ItemPointer) DatumGetPointer(X))
|
||||||
|
#define ItemPointerGetDatum(X) PointerGetDatum(X)
|
||||||
|
|
||||||
/* note: BlockNumber is unsigned, hence can't be negative */
|
/* note: BlockNumber is unsigned, hence can't be negative */
|
||||||
#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
|
#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
|
||||||
@ -243,6 +247,9 @@ struct user_args
|
|||||||
{
|
{
|
||||||
Page page;
|
Page page;
|
||||||
OffsetNumber offset;
|
OffsetNumber offset;
|
||||||
|
bool leafpage;
|
||||||
|
bool rightmost;
|
||||||
|
TupleDesc tupd;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*-------------------------------------------------------
|
/*-------------------------------------------------------
|
||||||
@ -252,17 +259,25 @@ struct user_args
|
|||||||
* ------------------------------------------------------
|
* ------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
static Datum
|
static Datum
|
||||||
bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset)
|
bt_page_print_tuples(FuncCallContext *fctx, struct user_args *uargs)
|
||||||
{
|
{
|
||||||
char *values[6];
|
Page page = uargs->page;
|
||||||
|
OffsetNumber offset = uargs->offset;
|
||||||
|
bool leafpage = uargs->leafpage;
|
||||||
|
bool rightmost = uargs->rightmost;
|
||||||
|
bool ispivottuple;
|
||||||
|
Datum values[9];
|
||||||
|
bool nulls[9];
|
||||||
HeapTuple tuple;
|
HeapTuple tuple;
|
||||||
ItemId id;
|
ItemId id;
|
||||||
IndexTuple itup;
|
IndexTuple itup;
|
||||||
int j;
|
int j;
|
||||||
int off;
|
int off;
|
||||||
int dlen;
|
int dlen;
|
||||||
char *dump;
|
char *dump,
|
||||||
|
*datacstring;
|
||||||
char *ptr;
|
char *ptr;
|
||||||
|
ItemPointer htid;
|
||||||
|
|
||||||
id = PageGetItemId(page, offset);
|
id = PageGetItemId(page, offset);
|
||||||
|
|
||||||
@ -272,18 +287,49 @@ bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset)
|
|||||||
itup = (IndexTuple) PageGetItem(page, id);
|
itup = (IndexTuple) PageGetItem(page, id);
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
values[j++] = psprintf("%d", offset);
|
memset(nulls, 0, sizeof(nulls));
|
||||||
values[j++] = psprintf("(%u,%u)",
|
values[j++] = DatumGetInt16(offset);
|
||||||
ItemPointerGetBlockNumberNoCheck(&itup->t_tid),
|
values[j++] = ItemPointerGetDatum(&itup->t_tid);
|
||||||
ItemPointerGetOffsetNumberNoCheck(&itup->t_tid));
|
values[j++] = Int32GetDatum((int) IndexTupleSize(itup));
|
||||||
values[j++] = psprintf("%d", (int) IndexTupleSize(itup));
|
values[j++] = BoolGetDatum(IndexTupleHasNulls(itup));
|
||||||
values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f');
|
values[j++] = BoolGetDatum(IndexTupleHasVarwidths(itup));
|
||||||
values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
|
|
||||||
|
|
||||||
ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
|
ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
|
||||||
dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
|
dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure that "data" column does not include posting list or pivot
|
||||||
|
* tuple representation of heap TID(s).
|
||||||
|
*
|
||||||
|
* Note: BTreeTupleIsPivot() won't work reliably on !heapkeyspace indexes
|
||||||
|
* (those built before BTREE_VERSION 4), but we have no way of determining
|
||||||
|
* if this page came from a !heapkeyspace index. We may only have a bytea
|
||||||
|
* nbtree page image to go on, so in general there is no metapage that we
|
||||||
|
* can check.
|
||||||
|
*
|
||||||
|
* That's okay here because BTreeTupleIsPivot() can only return false for
|
||||||
|
* a !heapkeyspace pivot, never true for a !heapkeyspace non-pivot. Since
|
||||||
|
* heap TID isn't part of the keyspace in a !heapkeyspace index anyway,
|
||||||
|
* there cannot possibly be a pivot tuple heap TID representation that we
|
||||||
|
* fail to make an adjustment for. A !heapkeyspace index can have
|
||||||
|
* BTreeTupleIsPivot() return true (due to things like suffix truncation
|
||||||
|
* for INCLUDE indexes in Postgres v11), but when that happens
|
||||||
|
* BTreeTupleGetHeapTID() can be trusted to work reliably (i.e. return
|
||||||
|
* NULL).
|
||||||
|
*
|
||||||
|
* Note: BTreeTupleIsPosting() always works reliably, even with
|
||||||
|
* !heapkeyspace indexes.
|
||||||
|
*/
|
||||||
|
if (BTreeTupleIsPosting(itup))
|
||||||
|
dlen -= IndexTupleSize(itup) - BTreeTupleGetPostingOffset(itup);
|
||||||
|
else if (BTreeTupleIsPivot(itup) && BTreeTupleGetHeapTID(itup) != NULL)
|
||||||
|
dlen -= MAXALIGN(sizeof(ItemPointerData));
|
||||||
|
|
||||||
|
if (dlen < 0 || dlen > INDEX_SIZE_MASK)
|
||||||
|
elog(ERROR, "invalid tuple length %d for tuple at offset number %u",
|
||||||
|
dlen, offset);
|
||||||
dump = palloc0(dlen * 3 + 1);
|
dump = palloc0(dlen * 3 + 1);
|
||||||
values[j] = dump;
|
datacstring = dump;
|
||||||
for (off = 0; off < dlen; off++)
|
for (off = 0; off < dlen; off++)
|
||||||
{
|
{
|
||||||
if (off > 0)
|
if (off > 0)
|
||||||
@ -291,8 +337,62 @@ bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset)
|
|||||||
sprintf(dump, "%02x", *(ptr + off) & 0xff);
|
sprintf(dump, "%02x", *(ptr + off) & 0xff);
|
||||||
dump += 2;
|
dump += 2;
|
||||||
}
|
}
|
||||||
|
values[j++] = CStringGetTextDatum(datacstring);
|
||||||
|
pfree(datacstring);
|
||||||
|
|
||||||
tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
|
/*
|
||||||
|
* We need to work around the BTreeTupleIsPivot() !heapkeyspace limitation
|
||||||
|
* again. Deduce whether or not tuple must be a pivot tuple based on
|
||||||
|
* whether or not the page is a leaf page, as well as the page offset
|
||||||
|
* number of the tuple.
|
||||||
|
*/
|
||||||
|
ispivottuple = (!leafpage || (!rightmost && offset == P_HIKEY));
|
||||||
|
|
||||||
|
/* LP_DEAD bit can never be set for pivot tuples, so show a NULL there */
|
||||||
|
if (!ispivottuple)
|
||||||
|
values[j++] = BoolGetDatum(ItemIdIsDead(id));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Assert(!ItemIdIsDead(id));
|
||||||
|
nulls[j++] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
htid = BTreeTupleGetHeapTID(itup);
|
||||||
|
if (ispivottuple && !BTreeTupleIsPivot(itup))
|
||||||
|
{
|
||||||
|
/* Don't show bogus heap TID in !heapkeyspace pivot tuple */
|
||||||
|
htid = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (htid)
|
||||||
|
values[j++] = ItemPointerGetDatum(htid);
|
||||||
|
else
|
||||||
|
nulls[j++] = true;
|
||||||
|
|
||||||
|
if (BTreeTupleIsPosting(itup))
|
||||||
|
{
|
||||||
|
/* Build an array of item pointers */
|
||||||
|
ItemPointer tids;
|
||||||
|
Datum *tids_datum;
|
||||||
|
int nposting;
|
||||||
|
|
||||||
|
tids = BTreeTupleGetPosting(itup);
|
||||||
|
nposting = BTreeTupleGetNPosting(itup);
|
||||||
|
tids_datum = (Datum *) palloc(nposting * sizeof(Datum));
|
||||||
|
for (int i = 0; i < nposting; i++)
|
||||||
|
tids_datum[i] = ItemPointerGetDatum(&tids[i]);
|
||||||
|
values[j++] = PointerGetDatum(construct_array(tids_datum,
|
||||||
|
nposting,
|
||||||
|
TIDOID,
|
||||||
|
sizeof(ItemPointerData),
|
||||||
|
false, 's'));
|
||||||
|
pfree(tids_datum);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
nulls[j++] = true;
|
||||||
|
|
||||||
|
/* Build and return the result tuple */
|
||||||
|
tuple = heap_form_tuple(uargs->tupd, values, nulls);
|
||||||
|
|
||||||
return HeapTupleGetDatum(tuple);
|
return HeapTupleGetDatum(tuple);
|
||||||
}
|
}
|
||||||
@ -378,12 +478,15 @@ bt_page_items(PG_FUNCTION_ARGS)
|
|||||||
elog(NOTICE, "page is deleted");
|
elog(NOTICE, "page is deleted");
|
||||||
|
|
||||||
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
|
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
|
||||||
|
uargs->leafpage = P_ISLEAF(opaque);
|
||||||
|
uargs->rightmost = P_RIGHTMOST(opaque);
|
||||||
|
|
||||||
/* Build a tuple descriptor for our result type */
|
/* Build a tuple descriptor for our result type */
|
||||||
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
|
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
|
||||||
elog(ERROR, "return type must be a row type");
|
elog(ERROR, "return type must be a row type");
|
||||||
|
tupleDesc = BlessTupleDesc(tupleDesc);
|
||||||
|
|
||||||
fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
|
uargs->tupd = tupleDesc;
|
||||||
|
|
||||||
fctx->user_fctx = uargs;
|
fctx->user_fctx = uargs;
|
||||||
|
|
||||||
@ -395,7 +498,7 @@ bt_page_items(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
if (fctx->call_cntr < fctx->max_calls)
|
if (fctx->call_cntr < fctx->max_calls)
|
||||||
{
|
{
|
||||||
result = bt_page_print_tuples(fctx, uargs->page, uargs->offset);
|
result = bt_page_print_tuples(fctx, uargs);
|
||||||
uargs->offset++;
|
uargs->offset++;
|
||||||
SRF_RETURN_NEXT(fctx, result);
|
SRF_RETURN_NEXT(fctx, result);
|
||||||
}
|
}
|
||||||
@ -463,12 +566,15 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
|
|||||||
elog(NOTICE, "page is deleted");
|
elog(NOTICE, "page is deleted");
|
||||||
|
|
||||||
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
|
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
|
||||||
|
uargs->leafpage = P_ISLEAF(opaque);
|
||||||
|
uargs->rightmost = P_RIGHTMOST(opaque);
|
||||||
|
|
||||||
/* Build a tuple descriptor for our result type */
|
/* Build a tuple descriptor for our result type */
|
||||||
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
|
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
|
||||||
elog(ERROR, "return type must be a row type");
|
elog(ERROR, "return type must be a row type");
|
||||||
|
tupleDesc = BlessTupleDesc(tupleDesc);
|
||||||
|
|
||||||
fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
|
uargs->tupd = tupleDesc;
|
||||||
|
|
||||||
fctx->user_fctx = uargs;
|
fctx->user_fctx = uargs;
|
||||||
|
|
||||||
@ -480,7 +586,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
if (fctx->call_cntr < fctx->max_calls)
|
if (fctx->call_cntr < fctx->max_calls)
|
||||||
{
|
{
|
||||||
result = bt_page_print_tuples(fctx, uargs->page, uargs->offset);
|
result = bt_page_print_tuples(fctx, uargs);
|
||||||
uargs->offset++;
|
uargs->offset++;
|
||||||
SRF_RETURN_NEXT(fctx, result);
|
SRF_RETURN_NEXT(fctx, result);
|
||||||
}
|
}
|
||||||
@ -510,7 +616,7 @@ bt_metap(PG_FUNCTION_ARGS)
|
|||||||
BTMetaPageData *metad;
|
BTMetaPageData *metad;
|
||||||
TupleDesc tupleDesc;
|
TupleDesc tupleDesc;
|
||||||
int j;
|
int j;
|
||||||
char *values[8];
|
char *values[9];
|
||||||
Buffer buffer;
|
Buffer buffer;
|
||||||
Page page;
|
Page page;
|
||||||
HeapTuple tuple;
|
HeapTuple tuple;
|
||||||
@ -557,17 +663,21 @@ bt_metap(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Get values of extended metadata if available, use default values
|
* Get values of extended metadata if available, use default values
|
||||||
* otherwise.
|
* otherwise. Note that we rely on the assumption that btm_allequalimage
|
||||||
|
* is initialized to zero with indexes that were built on versions prior
|
||||||
|
* to Postgres 13 (just like _bt_metaversion()).
|
||||||
*/
|
*/
|
||||||
if (metad->btm_version >= BTREE_NOVAC_VERSION)
|
if (metad->btm_version >= BTREE_NOVAC_VERSION)
|
||||||
{
|
{
|
||||||
values[j++] = psprintf("%u", metad->btm_oldest_btpo_xact);
|
values[j++] = psprintf("%u", metad->btm_oldest_btpo_xact);
|
||||||
values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
|
values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
|
||||||
|
values[j++] = metad->btm_allequalimage ? "t" : "f";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
values[j++] = "0";
|
values[j++] = "0";
|
||||||
values[j++] = "-1";
|
values[j++] = "-1";
|
||||||
|
values[j++] = "f";
|
||||||
}
|
}
|
||||||
|
|
||||||
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
|
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
|
||||||
|
@ -12,6 +12,7 @@ fastroot | 1
|
|||||||
fastlevel | 0
|
fastlevel | 0
|
||||||
oldest_xact | 0
|
oldest_xact | 0
|
||||||
last_cleanup_num_tuples | -1
|
last_cleanup_num_tuples | -1
|
||||||
|
allequalimage | t
|
||||||
|
|
||||||
SELECT * FROM bt_page_stats('test1_a_idx', 0);
|
SELECT * FROM bt_page_stats('test1_a_idx', 0);
|
||||||
ERROR: block 0 is a meta page
|
ERROR: block 0 is a meta page
|
||||||
@ -41,6 +42,9 @@ itemlen | 16
|
|||||||
nulls | f
|
nulls | f
|
||||||
vars | f
|
vars | f
|
||||||
data | 01 00 00 00 00 00 00 01
|
data | 01 00 00 00 00 00 00 01
|
||||||
|
dead | f
|
||||||
|
htid | (0,1)
|
||||||
|
tids |
|
||||||
|
|
||||||
SELECT * FROM bt_page_items('test1_a_idx', 2);
|
SELECT * FROM bt_page_items('test1_a_idx', 2);
|
||||||
ERROR: block number out of range
|
ERROR: block number out of range
|
||||||
@ -54,6 +58,9 @@ itemlen | 16
|
|||||||
nulls | f
|
nulls | f
|
||||||
vars | f
|
vars | f
|
||||||
data | 01 00 00 00 00 00 00 01
|
data | 01 00 00 00 00 00 00 01
|
||||||
|
dead | f
|
||||||
|
htid | (0,1)
|
||||||
|
tids |
|
||||||
|
|
||||||
SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 2));
|
SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 2));
|
||||||
ERROR: block number 2 is out of range for relation "test1_a_idx"
|
ERROR: block number 2 is out of range for relation "test1_a_idx"
|
||||||
|
@ -14,3 +14,56 @@ CREATE FUNCTION heap_tuple_infomask_flags(
|
|||||||
RETURNS record
|
RETURNS record
|
||||||
AS 'MODULE_PATHNAME', 'heap_tuple_infomask_flags'
|
AS 'MODULE_PATHNAME', 'heap_tuple_infomask_flags'
|
||||||
LANGUAGE C STRICT PARALLEL SAFE;
|
LANGUAGE C STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- bt_metap()
|
||||||
|
--
|
||||||
|
DROP FUNCTION bt_metap(text);
|
||||||
|
CREATE FUNCTION bt_metap(IN relname text,
|
||||||
|
OUT magic int4,
|
||||||
|
OUT version int4,
|
||||||
|
OUT root int4,
|
||||||
|
OUT level int4,
|
||||||
|
OUT fastroot int4,
|
||||||
|
OUT fastlevel int4,
|
||||||
|
OUT oldest_xact int4,
|
||||||
|
OUT last_cleanup_num_tuples real,
|
||||||
|
OUT allequalimage boolean)
|
||||||
|
AS 'MODULE_PATHNAME', 'bt_metap'
|
||||||
|
LANGUAGE C STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- bt_page_items(text, int4)
|
||||||
|
--
|
||||||
|
DROP FUNCTION bt_page_items(text, int4);
|
||||||
|
CREATE FUNCTION bt_page_items(IN relname text, IN blkno int4,
|
||||||
|
OUT itemoffset smallint,
|
||||||
|
OUT ctid tid,
|
||||||
|
OUT itemlen smallint,
|
||||||
|
OUT nulls bool,
|
||||||
|
OUT vars bool,
|
||||||
|
OUT data text,
|
||||||
|
OUT dead boolean,
|
||||||
|
OUT htid tid,
|
||||||
|
OUT tids tid[])
|
||||||
|
RETURNS SETOF record
|
||||||
|
AS 'MODULE_PATHNAME', 'bt_page_items'
|
||||||
|
LANGUAGE C STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- bt_page_items(bytea)
|
||||||
|
--
|
||||||
|
DROP FUNCTION bt_page_items(bytea);
|
||||||
|
CREATE FUNCTION bt_page_items(IN page bytea,
|
||||||
|
OUT itemoffset smallint,
|
||||||
|
OUT ctid tid,
|
||||||
|
OUT itemlen smallint,
|
||||||
|
OUT nulls bool,
|
||||||
|
OUT vars bool,
|
||||||
|
OUT data text,
|
||||||
|
OUT dead boolean,
|
||||||
|
OUT htid tid,
|
||||||
|
OUT tids tid[])
|
||||||
|
RETURNS SETOF record
|
||||||
|
AS 'MODULE_PATHNAME', 'bt_page_items_bytea'
|
||||||
|
LANGUAGE C STRICT PARALLEL SAFE;
|
||||||
|
@ -300,13 +300,14 @@ test=# SELECT t_ctid, raw_flags, combined_flags
|
|||||||
test=# SELECT * FROM bt_metap('pg_cast_oid_index');
|
test=# SELECT * FROM bt_metap('pg_cast_oid_index');
|
||||||
-[ RECORD 1 ]-----------+-------
|
-[ RECORD 1 ]-----------+-------
|
||||||
magic | 340322
|
magic | 340322
|
||||||
version | 3
|
version | 4
|
||||||
root | 1
|
root | 1
|
||||||
level | 0
|
level | 0
|
||||||
fastroot | 1
|
fastroot | 1
|
||||||
fastlevel | 0
|
fastlevel | 0
|
||||||
oldest_xact | 582
|
oldest_xact | 582
|
||||||
last_cleanup_num_tuples | 1000
|
last_cleanup_num_tuples | 1000
|
||||||
|
allequalimage | f
|
||||||
</screen>
|
</screen>
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
@ -329,11 +330,11 @@ test=# SELECT * FROM bt_page_stats('pg_cast_oid_index', 1);
|
|||||||
-[ RECORD 1 ]-+-----
|
-[ RECORD 1 ]-+-----
|
||||||
blkno | 1
|
blkno | 1
|
||||||
type | l
|
type | l
|
||||||
live_items | 256
|
live_items | 224
|
||||||
dead_items | 0
|
dead_items | 0
|
||||||
avg_item_size | 12
|
avg_item_size | 16
|
||||||
page_size | 8192
|
page_size | 8192
|
||||||
free_size | 4056
|
free_size | 3668
|
||||||
btpo_prev | 0
|
btpo_prev | 0
|
||||||
btpo_next | 0
|
btpo_next | 0
|
||||||
btpo | 0
|
btpo | 0
|
||||||
@ -356,33 +357,75 @@ btpo_flags | 3
|
|||||||
<function>bt_page_items</function> returns detailed information about
|
<function>bt_page_items</function> returns detailed information about
|
||||||
all of the items on a B-tree index page. For example:
|
all of the items on a B-tree index page. For example:
|
||||||
<screen>
|
<screen>
|
||||||
test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1);
|
test=# SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
|
||||||
itemoffset | ctid | itemlen | nulls | vars | data
|
FROM bt_page_items(get_raw_page('tenk2_hundred', 5));
|
||||||
------------+---------+---------+-------+------+-------------
|
itemoffset | ctid | itemlen | nulls | vars | data | dead | htid | some_tids
|
||||||
1 | (0,1) | 12 | f | f | 23 27 00 00
|
------------+-----------+---------+-------+------+-------------------------+------+--------+---------------------
|
||||||
2 | (0,2) | 12 | f | f | 24 27 00 00
|
1 | (16,1) | 16 | f | f | 30 00 00 00 00 00 00 00 | | |
|
||||||
3 | (0,3) | 12 | f | f | 25 27 00 00
|
2 | (16,8292) | 616 | f | f | 24 00 00 00 00 00 00 00 | f | (1,6) | {"(1,6)","(10,22)"}
|
||||||
4 | (0,4) | 12 | f | f | 26 27 00 00
|
3 | (16,8292) | 616 | f | f | 25 00 00 00 00 00 00 00 | f | (1,18) | {"(1,18)","(4,22)"}
|
||||||
5 | (0,5) | 12 | f | f | 27 27 00 00
|
4 | (16,8292) | 616 | f | f | 26 00 00 00 00 00 00 00 | f | (4,18) | {"(4,18)","(6,17)"}
|
||||||
6 | (0,6) | 12 | f | f | 28 27 00 00
|
5 | (16,8292) | 616 | f | f | 27 00 00 00 00 00 00 00 | f | (1,2) | {"(1,2)","(1,19)"}
|
||||||
7 | (0,7) | 12 | f | f | 29 27 00 00
|
6 | (16,8292) | 616 | f | f | 28 00 00 00 00 00 00 00 | f | (2,24) | {"(2,24)","(4,11)"}
|
||||||
8 | (0,8) | 12 | f | f | 2a 27 00 00
|
7 | (16,8292) | 616 | f | f | 29 00 00 00 00 00 00 00 | f | (2,17) | {"(2,17)","(11,2)"}
|
||||||
|
8 | (16,8292) | 616 | f | f | 2a 00 00 00 00 00 00 00 | f | (0,25) | {"(0,25)","(3,20)"}
|
||||||
|
9 | (16,8292) | 616 | f | f | 2b 00 00 00 00 00 00 00 | f | (0,10) | {"(0,10)","(0,14)"}
|
||||||
|
10 | (16,8292) | 616 | f | f | 2c 00 00 00 00 00 00 00 | f | (1,3) | {"(1,3)","(3,9)"}
|
||||||
|
11 | (16,8292) | 616 | f | f | 2d 00 00 00 00 00 00 00 | f | (6,28) | {"(6,28)","(11,1)"}
|
||||||
|
12 | (16,8292) | 616 | f | f | 2e 00 00 00 00 00 00 00 | f | (0,27) | {"(0,27)","(1,13)"}
|
||||||
|
13 | (16,8292) | 616 | f | f | 2f 00 00 00 00 00 00 00 | f | (4,17) | {"(4,17)","(4,21)"}
|
||||||
|
(13 rows)
|
||||||
</screen>
|
</screen>
|
||||||
In a B-tree leaf page, <structfield>ctid</structfield> points to a heap tuple.
|
This is a B-tree leaf page. All tuples that point to the table
|
||||||
In an internal page, the block number part of <structfield>ctid</structfield>
|
happen to be posting list tuples (all of which store a total of
|
||||||
points to another page in the index itself, while the offset part
|
100 6 byte TIDs). There is also a <quote>high key</quote> tuple
|
||||||
(the second number) is ignored and is usually 1.
|
at <literal>itemoffset</literal> number 1.
|
||||||
|
<structfield>ctid</structfield> is used to store encoded
|
||||||
|
information about each tuple in this example, though leaf page
|
||||||
|
tuples often store a heap TID directly in the
|
||||||
|
<structfield>ctid</structfield> field instead.
|
||||||
|
<structfield>tids</structfield> is the list of TIDs stored as a
|
||||||
|
posting list.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In an internal page (not shown), the block number part of
|
||||||
|
<structfield>ctid</structfield> is a <quote>downlink</quote>,
|
||||||
|
which is a block number of another page in the index itself.
|
||||||
|
The offset part (the second number) of
|
||||||
|
<structfield>ctid</structfield> stores encoded information about
|
||||||
|
the tuple, such as the number of columns present (suffix
|
||||||
|
truncation may have removed unneeded suffix columns). Truncated
|
||||||
|
columns are treated as having the value <quote>minus
|
||||||
|
infinity</quote>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<structfield>htid</structfield> shows a heap TID for the tuple,
|
||||||
|
regardless of the underlying tuple representation. This value
|
||||||
|
may match <structfield>ctid</structfield>, or may be decoded
|
||||||
|
from the alternative representations used by posting list tuples
|
||||||
|
and tuples from internal pages. Tuples in internal pages
|
||||||
|
usually have the implementation level heap TID column truncated
|
||||||
|
away, which is represented as a NULL
|
||||||
|
<structfield>htid</structfield> value.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Note that the first item on any non-rightmost page (any page with
|
Note that the first item on any non-rightmost page (any page with
|
||||||
a non-zero value in the <structfield>btpo_next</structfield> field) is the
|
a non-zero value in the <structfield>btpo_next</structfield> field) is the
|
||||||
page's <quote>high key</quote>, meaning its <structfield>data</structfield>
|
page's <quote>high key</quote>, meaning its <structfield>data</structfield>
|
||||||
serves as an upper bound on all items appearing on the page, while
|
serves as an upper bound on all items appearing on the page, while
|
||||||
its <structfield>ctid</structfield> field is meaningless. Also, on non-leaf
|
its <structfield>ctid</structfield> field does not point to
|
||||||
pages, the first real data item (the first item that is not a high
|
another block. Also, on internal pages, the first real data
|
||||||
key) is a <quote>minus infinity</quote> item, with no actual value
|
item (the first item that is not a high key) reliably has every
|
||||||
in its <structfield>data</structfield> field. Such an item does have a valid
|
column truncated away, leaving no actual value in its
|
||||||
downlink in its <structfield>ctid</structfield> field, however.
|
<structfield>data</structfield> field. Such an item does have a
|
||||||
|
valid downlink in its <structfield>ctid</structfield> field,
|
||||||
|
however.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For more details about the structure of B-tree indexes, see
|
||||||
|
<xref linkend="btree-structure"/>. For more details about
|
||||||
|
deduplication and posting lists, see <xref
|
||||||
|
linkend="btree-deduplication"/>.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -402,17 +445,24 @@ test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1);
|
|||||||
with <function>get_raw_page</function> should be passed as argument. So
|
with <function>get_raw_page</function> should be passed as argument. So
|
||||||
the last example could also be rewritten like this:
|
the last example could also be rewritten like this:
|
||||||
<screen>
|
<screen>
|
||||||
test=# SELECT * FROM bt_page_items(get_raw_page('pg_cast_oid_index', 1));
|
test=# SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
|
||||||
itemoffset | ctid | itemlen | nulls | vars | data
|
FROM bt_page_items(get_raw_page('tenk2_hundred', 5));
|
||||||
------------+---------+---------+-------+------+-------------
|
itemoffset | ctid | itemlen | nulls | vars | data | dead | htid | some_tids
|
||||||
1 | (0,1) | 12 | f | f | 23 27 00 00
|
------------+-----------+---------+-------+------+-------------------------+------+--------+---------------------
|
||||||
2 | (0,2) | 12 | f | f | 24 27 00 00
|
1 | (16,1) | 16 | f | f | 30 00 00 00 00 00 00 00 | | |
|
||||||
3 | (0,3) | 12 | f | f | 25 27 00 00
|
2 | (16,8292) | 616 | f | f | 24 00 00 00 00 00 00 00 | f | (1,6) | {"(1,6)","(10,22)"}
|
||||||
4 | (0,4) | 12 | f | f | 26 27 00 00
|
3 | (16,8292) | 616 | f | f | 25 00 00 00 00 00 00 00 | f | (1,18) | {"(1,18)","(4,22)"}
|
||||||
5 | (0,5) | 12 | f | f | 27 27 00 00
|
4 | (16,8292) | 616 | f | f | 26 00 00 00 00 00 00 00 | f | (4,18) | {"(4,18)","(6,17)"}
|
||||||
6 | (0,6) | 12 | f | f | 28 27 00 00
|
5 | (16,8292) | 616 | f | f | 27 00 00 00 00 00 00 00 | f | (1,2) | {"(1,2)","(1,19)"}
|
||||||
7 | (0,7) | 12 | f | f | 29 27 00 00
|
6 | (16,8292) | 616 | f | f | 28 00 00 00 00 00 00 00 | f | (2,24) | {"(2,24)","(4,11)"}
|
||||||
8 | (0,8) | 12 | f | f | 2a 27 00 00
|
7 | (16,8292) | 616 | f | f | 29 00 00 00 00 00 00 00 | f | (2,17) | {"(2,17)","(11,2)"}
|
||||||
|
8 | (16,8292) | 616 | f | f | 2a 00 00 00 00 00 00 00 | f | (0,25) | {"(0,25)","(3,20)"}
|
||||||
|
9 | (16,8292) | 616 | f | f | 2b 00 00 00 00 00 00 00 | f | (0,10) | {"(0,10)","(0,14)"}
|
||||||
|
10 | (16,8292) | 616 | f | f | 2c 00 00 00 00 00 00 00 | f | (1,3) | {"(1,3)","(3,9)"}
|
||||||
|
11 | (16,8292) | 616 | f | f | 2d 00 00 00 00 00 00 00 | f | (6,28) | {"(6,28)","(11,1)"}
|
||||||
|
12 | (16,8292) | 616 | f | f | 2e 00 00 00 00 00 00 00 | f | (0,27) | {"(0,27)","(1,13)"}
|
||||||
|
13 | (16,8292) | 616 | f | f | 2f 00 00 00 00 00 00 00 | f | (4,17) | {"(4,17)","(4,21)"}
|
||||||
|
(13 rows)
|
||||||
</screen>
|
</screen>
|
||||||
All the other details are the same as explained in the previous item.
|
All the other details are the same as explained in the previous item.
|
||||||
</para>
|
</para>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user