1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-18 12:22:09 +03:00

Get rid of the post-recovery cleanup step of GIN page splits.

Replace it with an approach similar to what GiST uses: when a page is split,
the left sibling is marked with a flag indicating that the parent hasn't been
updated yet. When the parent is updated, the flag is cleared. If an insertion
steps on a page with the flag set, it will finish split before proceeding
with the insertion.

The post-recovery cleanup mechanism was never totally reliable, as insertion
to the parent could fail e.g because of running out of memory or disk space,
leaving the tree in an inconsistent state.

This also divides the responsibility of WAL-logging more clearly between
the generic ginbtree.c code, and the parts specific to entry and posting
trees. There is now a common WAL record format for insertions and deletions,
which is written by ginbtree.c, followed by tree-specific payload, which is
returned by the placetopage- and split- callbacks.
This commit is contained in:
Heikki Linnakangas
2013-11-27 19:21:23 +02:00
parent ce5326eed3
commit 631118fe1e
9 changed files with 677 additions and 567 deletions

View File

@@ -41,20 +41,45 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
break;
case XLOG_GIN_INSERT:
appendStringInfoString(buf, "Insert item, ");
desc_node(buf, ((ginxlogInsert *) rec)->node, ((ginxlogInsert *) rec)->blkno);
appendStringInfo(buf, " offset: %u nitem: %u isdata: %c isleaf %c isdelete %c updateBlkno:%u",
((ginxlogInsert *) rec)->offset,
((ginxlogInsert *) rec)->nitem,
(((ginxlogInsert *) rec)->isData) ? 'T' : 'F',
(((ginxlogInsert *) rec)->isLeaf) ? 'T' : 'F',
(((ginxlogInsert *) rec)->isDelete) ? 'T' : 'F',
((ginxlogInsert *) rec)->updateBlkno);
{
ginxlogInsert *xlrec = (ginxlogInsert *) rec;
char *payload = rec + sizeof(ginxlogInsert);
appendStringInfoString(buf, "Insert item, ");
desc_node(buf, xlrec->node, xlrec->blkno);
appendStringInfo(buf, " offset: %u isdata: %c isleaf: %c",
xlrec->offset,
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
if (!(xlrec->flags & GIN_INSERT_ISLEAF))
{
BlockNumber leftChildBlkno;
BlockNumber rightChildBlkno;
memcpy(&leftChildBlkno, payload, sizeof(BlockNumber));
payload += sizeof(BlockNumber);
memcpy(&rightChildBlkno, payload, sizeof(BlockNumber));
payload += sizeof(BlockNumber);
appendStringInfo(buf, " children: %u/%u",
leftChildBlkno, rightChildBlkno);
}
if (!(xlrec->flags & GIN_INSERT_ISDATA))
appendStringInfo(buf, " isdelete: %c",
(((ginxlogInsertEntry *) payload)->isDelete) ? 'T' : 'F');
else if (xlrec->flags & GIN_INSERT_ISLEAF)
appendStringInfo(buf, " nitem: %u",
(((ginxlogInsertDataLeaf *) payload)->nitem) ? 'T' : 'F');
else
appendStringInfo(buf, " pitem: %u-%u/%u",
PostingItemGetBlockNumber((PostingItem *) payload),
ItemPointerGetBlockNumber(&((PostingItem *) payload)->key),
ItemPointerGetOffsetNumber(&((PostingItem *) payload)->key));
}
break;
case XLOG_GIN_SPLIT:
appendStringInfoString(buf, "Page split, ");
desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->isRootSplit) ? 'T' : 'F');
appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
break;
case XLOG_GIN_VACUUM_PAGE:
appendStringInfoString(buf, "Vacuum page, ");