mirror of
https://github.com/postgres/postgres.git
synced 2025-11-16 15:02:33 +03:00
Adjust btree index build to not use shared buffers, thereby avoiding the
locking conflict against concurrent CHECKPOINT that was discussed a few weeks ago. Also, if not using WAL archiving (which is always true ATM but won't be if PITR makes it into this release), there's no need to WAL-log the index build process; it's sufficient to force-fsync the completed index before commit. This seems to gain about a factor of 2 in my tests, which is consistent with writing half as much data. I did not try it with WAL on a separate drive though --- probably the gain would be a lot less in that scenario.
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.75 2004/04/21 18:24:25 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.76 2004/06/02 17:28:17 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@@ -31,8 +31,9 @@
|
||||
/*
|
||||
* _bt_metapinit() -- Initialize the metadata page of a new btree.
|
||||
*
|
||||
* If markvalid is true, the index is immediately marked valid, else it
|
||||
* will be invalid until _bt_metaproot() is called.
|
||||
* Note: this is actually not used for standard btree index building;
|
||||
* nbtsort.c prefers not to make the metadata page valid until completion
|
||||
* of build.
|
||||
*
|
||||
* Note: there's no real need for any locking here. Since the transaction
|
||||
* creating the index hasn't committed yet, no one else can even see the index
|
||||
@@ -40,12 +41,11 @@
|
||||
* not true, but we assume the caller holds sufficient locks on the index.)
|
||||
*/
|
||||
void
|
||||
_bt_metapinit(Relation rel, bool markvalid)
|
||||
_bt_metapinit(Relation rel)
|
||||
{
|
||||
Buffer buf;
|
||||
Page pg;
|
||||
BTMetaPageData *metad;
|
||||
BTPageOpaque op;
|
||||
|
||||
if (RelationGetNumberOfBlocks(rel) != 0)
|
||||
elog(ERROR, "cannot initialize non-empty btree index \"%s\"",
|
||||
@@ -55,22 +55,12 @@ _bt_metapinit(Relation rel, bool markvalid)
|
||||
Assert(BufferGetBlockNumber(buf) == BTREE_METAPAGE);
|
||||
pg = BufferGetPage(buf);
|
||||
|
||||
_bt_initmetapage(pg, P_NONE, 0);
|
||||
metad = BTPageGetMeta(pg);
|
||||
|
||||
/* NO ELOG(ERROR) from here till newmeta op is logged */
|
||||
START_CRIT_SECTION();
|
||||
|
||||
_bt_pageinit(pg, BufferGetPageSize(buf));
|
||||
|
||||
metad = BTPageGetMeta(pg);
|
||||
metad->btm_magic = markvalid ? BTREE_MAGIC : 0;
|
||||
metad->btm_version = BTREE_VERSION;
|
||||
metad->btm_root = P_NONE;
|
||||
metad->btm_level = 0;
|
||||
metad->btm_fastroot = P_NONE;
|
||||
metad->btm_fastlevel = 0;
|
||||
|
||||
op = (BTPageOpaque) PageGetSpecialPointer(pg);
|
||||
op->btpo_flags = BTP_META;
|
||||
|
||||
/* XLOG stuff */
|
||||
if (!rel->rd_istemp)
|
||||
{
|
||||
@@ -90,7 +80,7 @@ _bt_metapinit(Relation rel, bool markvalid)
|
||||
rdata[0].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID,
|
||||
markvalid ? XLOG_BTREE_NEWMETA : XLOG_BTREE_INVALIDMETA,
|
||||
XLOG_BTREE_NEWMETA,
|
||||
rdata);
|
||||
|
||||
PageSetLSN(pg, recptr);
|
||||
@@ -102,6 +92,29 @@ _bt_metapinit(Relation rel, bool markvalid)
|
||||
WriteBuffer(buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_initmetapage() -- Fill a page buffer with a correct metapage image
|
||||
*/
|
||||
void
|
||||
_bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
|
||||
{
|
||||
BTMetaPageData *metad;
|
||||
BTPageOpaque metaopaque;
|
||||
|
||||
_bt_pageinit(page, BLCKSZ);
|
||||
|
||||
metad = BTPageGetMeta(page);
|
||||
metad->btm_magic = BTREE_MAGIC;
|
||||
metad->btm_version = BTREE_VERSION;
|
||||
metad->btm_root = rootbknum;
|
||||
metad->btm_level = level;
|
||||
metad->btm_fastroot = rootbknum;
|
||||
metad->btm_fastlevel = level;
|
||||
|
||||
metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
metaopaque->btpo_flags = BTP_META;
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_getroot() -- Get the root page of the btree.
|
||||
*
|
||||
@@ -609,76 +622,6 @@ _bt_page_recyclable(Page page)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_metaproot() -- Change the root page of the btree.
|
||||
*
|
||||
* Lehman and Yao require that the root page move around in order to
|
||||
* guarantee deadlock-free short-term, fine-granularity locking. When
|
||||
* we split the root page, we record the new parent in the metadata page
|
||||
* for the relation. This routine does the work.
|
||||
*
|
||||
* No direct preconditions, but if you don't have the write lock on
|
||||
* at least the old root page when you call this, you're making a big
|
||||
* mistake. On exit, metapage data is correct and we no longer have
|
||||
* a pin or lock on the metapage.
|
||||
*
|
||||
* Actually this is not used for splitting on-the-fly anymore. It's only used
|
||||
* in nbtsort.c at the completion of btree building, where we know we have
|
||||
* sole access to the index anyway.
|
||||
*/
|
||||
void
|
||||
_bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level)
|
||||
{
|
||||
Buffer metabuf;
|
||||
Page metap;
|
||||
BTPageOpaque metaopaque;
|
||||
BTMetaPageData *metad;
|
||||
|
||||
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
|
||||
metap = BufferGetPage(metabuf);
|
||||
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metap);
|
||||
Assert(metaopaque->btpo_flags & BTP_META);
|
||||
|
||||
/* NO ELOG(ERROR) from here till newmeta op is logged */
|
||||
START_CRIT_SECTION();
|
||||
|
||||
metad = BTPageGetMeta(metap);
|
||||
Assert(metad->btm_magic == BTREE_MAGIC || metad->btm_magic == 0);
|
||||
metad->btm_magic = BTREE_MAGIC; /* it's valid now for sure */
|
||||
metad->btm_root = rootbknum;
|
||||
metad->btm_level = level;
|
||||
metad->btm_fastroot = rootbknum;
|
||||
metad->btm_fastlevel = level;
|
||||
|
||||
/* XLOG stuff */
|
||||
if (!rel->rd_istemp)
|
||||
{
|
||||
xl_btree_newmeta xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[1];
|
||||
|
||||
xlrec.node = rel->rd_node;
|
||||
xlrec.meta.root = metad->btm_root;
|
||||
xlrec.meta.level = metad->btm_level;
|
||||
xlrec.meta.fastroot = metad->btm_fastroot;
|
||||
xlrec.meta.fastlevel = metad->btm_fastlevel;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBtreeNewmeta;
|
||||
rdata[0].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWMETA, rdata);
|
||||
|
||||
PageSetLSN(metap, recptr);
|
||||
PageSetSUI(metap, ThisStartUpID);
|
||||
}
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
_bt_wrtbuf(rel, metabuf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete item(s) from a btree page.
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user