mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Avoid using potentially-under-aligned page buffers.
There's a project policy against using plain "char buf[BLCKSZ]" local or static variables as page buffers; preferred style is to palloc or malloc each buffer to ensure it is MAXALIGN'd. However, that policy's been ignored in an increasing number of places. We've apparently got away with it so far, probably because (a) relatively few people use platforms on which misalignment causes core dumps and/or (b) the variables chance to be sufficiently aligned anyway. But this is not something to rely on. Moreover, even if we don't get a core dump, we might be paying a lot of cycles for misaligned accesses. To fix, invent new union types PGAlignedBlock and PGAlignedXLogBlock that the compiler must allocate with sufficient alignment, and use those in place of plain char arrays. I used these types even for variables where there's no risk of a misaligned access, since ensuring proper alignment should make kernel data transfers faster. I also changed some places where we had been palloc'ing short-lived buffers, for coding style uniformity and to save palloc/pfree overhead. Since this seems to be a live portability hazard (despite the lack of field reports), back-patch to all supported versions. Patch by me; thanks to Michael Paquier for review. Discussion: https://postgr.es/m/1535618100.1286.3.camel@credativ.de
This commit is contained in:
parent
20cd88857b
commit
083d9ced14
@ -37,7 +37,7 @@ typedef enum
|
|||||||
PREWARM_BUFFER
|
PREWARM_BUFFER
|
||||||
} PrewarmType;
|
} PrewarmType;
|
||||||
|
|
||||||
static char blockbuffer[BLCKSZ];
|
static PGAlignedBlock blockbuffer;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pg_prewarm(regclass, mode text, fork text,
|
* pg_prewarm(regclass, mode text, fork text,
|
||||||
@ -179,7 +179,7 @@ pg_prewarm(PG_FUNCTION_ARGS)
|
|||||||
for (block = first_block; block <= last_block; ++block)
|
for (block = first_block; block <= last_block; ++block)
|
||||||
{
|
{
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
smgrread(rel->rd_smgr, forkNumber, block, blockbuffer);
|
smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data);
|
||||||
++blocks_done;
|
++blocks_done;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -633,7 +633,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
|||||||
|
|
||||||
/* these must be static so they can be returned to caller */
|
/* these must be static so they can be returned to caller */
|
||||||
static ginxlogSplitEntry data;
|
static ginxlogSplitEntry data;
|
||||||
static char tupstore[2 * BLCKSZ];
|
static PGAlignedBlock tupstore[2];
|
||||||
|
|
||||||
entryPreparePage(btree, lpage, off, insertData, updateblkno);
|
entryPreparePage(btree, lpage, off, insertData, updateblkno);
|
||||||
|
|
||||||
@ -642,7 +642,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
|||||||
* one after another in a temporary workspace.
|
* one after another in a temporary workspace.
|
||||||
*/
|
*/
|
||||||
maxoff = PageGetMaxOffsetNumber(lpage);
|
maxoff = PageGetMaxOffsetNumber(lpage);
|
||||||
ptr = tupstore;
|
ptr = tupstore[0].data;
|
||||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||||
{
|
{
|
||||||
if (i == off)
|
if (i == off)
|
||||||
@ -667,7 +667,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
|||||||
ptr += size;
|
ptr += size;
|
||||||
totalsize += size + sizeof(ItemIdData);
|
totalsize += size + sizeof(ItemIdData);
|
||||||
}
|
}
|
||||||
tupstoresize = ptr - tupstore;
|
tupstoresize = ptr - tupstore[0].data;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize the left and right pages, and copy all the tuples back to
|
* Initialize the left and right pages, and copy all the tuples back to
|
||||||
@ -676,7 +676,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
|||||||
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
|
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
|
||||||
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
|
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
|
||||||
|
|
||||||
ptr = tupstore;
|
ptr = tupstore[0].data;
|
||||||
maxoff++;
|
maxoff++;
|
||||||
lsize = 0;
|
lsize = 0;
|
||||||
|
|
||||||
@ -715,7 +715,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
|||||||
rdata[0].next = &rdata[1];
|
rdata[0].next = &rdata[1];
|
||||||
|
|
||||||
rdata[1].buffer = InvalidBuffer;
|
rdata[1].buffer = InvalidBuffer;
|
||||||
rdata[1].data = tupstore;
|
rdata[1].data = tupstore[0].data;
|
||||||
rdata[1].len = tupstoresize;
|
rdata[1].len = tupstoresize;
|
||||||
rdata[1].next = NULL;
|
rdata[1].next = NULL;
|
||||||
|
|
||||||
|
@ -52,18 +52,15 @@ writeListPage(Relation index, Buffer buffer,
|
|||||||
size = 0;
|
size = 0;
|
||||||
OffsetNumber l,
|
OffsetNumber l,
|
||||||
off;
|
off;
|
||||||
char *workspace;
|
PGAlignedBlock workspace;
|
||||||
char *ptr;
|
char *ptr;
|
||||||
|
|
||||||
/* workspace could be a local array; we use palloc for alignment */
|
|
||||||
workspace = palloc(BLCKSZ);
|
|
||||||
|
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
GinInitBuffer(buffer, GIN_LIST);
|
GinInitBuffer(buffer, GIN_LIST);
|
||||||
|
|
||||||
off = FirstOffsetNumber;
|
off = FirstOffsetNumber;
|
||||||
ptr = workspace;
|
ptr = workspace.data;
|
||||||
|
|
||||||
for (i = 0; i < ntuples; i++)
|
for (i = 0; i < ntuples; i++)
|
||||||
{
|
{
|
||||||
@ -120,7 +117,7 @@ writeListPage(Relation index, Buffer buffer,
|
|||||||
rdata[0].next = rdata + 1;
|
rdata[0].next = rdata + 1;
|
||||||
|
|
||||||
rdata[1].buffer = InvalidBuffer;
|
rdata[1].buffer = InvalidBuffer;
|
||||||
rdata[1].data = workspace;
|
rdata[1].data = workspace.data;
|
||||||
rdata[1].len = size;
|
rdata[1].len = size;
|
||||||
rdata[1].next = NULL;
|
rdata[1].next = NULL;
|
||||||
|
|
||||||
@ -135,8 +132,6 @@ writeListPage(Relation index, Buffer buffer,
|
|||||||
|
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
|
|
||||||
pfree(workspace);
|
|
||||||
|
|
||||||
return freesize;
|
return freesize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -710,7 +710,7 @@ static bool
|
|||||||
_hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
|
_hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
|
||||||
{
|
{
|
||||||
BlockNumber lastblock;
|
BlockNumber lastblock;
|
||||||
char zerobuf[BLCKSZ];
|
PGAlignedBlock zerobuf;
|
||||||
|
|
||||||
lastblock = firstblock + nblocks - 1;
|
lastblock = firstblock + nblocks - 1;
|
||||||
|
|
||||||
@ -721,10 +721,10 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
|
|||||||
if (lastblock < firstblock || lastblock == InvalidBlockNumber)
|
if (lastblock < firstblock || lastblock == InvalidBlockNumber)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
MemSet(zerobuf, 0, sizeof(zerobuf));
|
MemSet(zerobuf.data, 0, sizeof(zerobuf));
|
||||||
|
|
||||||
RelationOpenSmgr(rel);
|
RelationOpenSmgr(rel);
|
||||||
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);
|
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf.data, false);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -2318,7 +2318,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
|||||||
HeapTuple *heaptuples;
|
HeapTuple *heaptuples;
|
||||||
int i;
|
int i;
|
||||||
int ndone;
|
int ndone;
|
||||||
char *scratch = NULL;
|
PGAlignedBlock scratch;
|
||||||
Page page;
|
Page page;
|
||||||
bool needwal;
|
bool needwal;
|
||||||
Size saveFreeSpace;
|
Size saveFreeSpace;
|
||||||
@ -2335,14 +2335,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
|||||||
heaptuples[i] = heap_prepare_insert(relation, tuples[i],
|
heaptuples[i] = heap_prepare_insert(relation, tuples[i],
|
||||||
xid, cid, options);
|
xid, cid, options);
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate some memory to use for constructing the WAL record. Using
|
|
||||||
* palloc() within a critical section is not safe, so we allocate this
|
|
||||||
* beforehand.
|
|
||||||
*/
|
|
||||||
if (needwal)
|
|
||||||
scratch = palloc(BLCKSZ);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We're about to do the actual inserts -- but check for conflict first,
|
* We're about to do the actual inserts -- but check for conflict first,
|
||||||
* to minimize the possibility of having to roll back work we've just
|
* to minimize the possibility of having to roll back work we've just
|
||||||
@ -2427,7 +2419,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
|||||||
uint8 info = XLOG_HEAP2_MULTI_INSERT;
|
uint8 info = XLOG_HEAP2_MULTI_INSERT;
|
||||||
char *tupledata;
|
char *tupledata;
|
||||||
int totaldatalen;
|
int totaldatalen;
|
||||||
char *scratchptr = scratch;
|
char *scratchptr = scratch.data;
|
||||||
bool init;
|
bool init;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2494,10 +2486,10 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
|||||||
log_heap_new_cid(relation, heaptup);
|
log_heap_new_cid(relation, heaptup);
|
||||||
}
|
}
|
||||||
totaldatalen = scratchptr - tupledata;
|
totaldatalen = scratchptr - tupledata;
|
||||||
Assert((scratchptr - scratch) < BLCKSZ);
|
Assert((scratchptr - scratch.data) < BLCKSZ);
|
||||||
|
|
||||||
rdata[0].data = (char *) xlrec;
|
rdata[0].data = (char *) xlrec;
|
||||||
rdata[0].len = tupledata - scratch;
|
rdata[0].len = tupledata - scratch.data;
|
||||||
rdata[0].buffer = InvalidBuffer;
|
rdata[0].buffer = InvalidBuffer;
|
||||||
rdata[0].next = &rdata[1];
|
rdata[0].next = &rdata[1];
|
||||||
|
|
||||||
|
@ -610,10 +610,9 @@ static void
|
|||||||
vm_extend(Relation rel, BlockNumber vm_nblocks)
|
vm_extend(Relation rel, BlockNumber vm_nblocks)
|
||||||
{
|
{
|
||||||
BlockNumber vm_nblocks_now;
|
BlockNumber vm_nblocks_now;
|
||||||
Page pg;
|
PGAlignedBlock pg;
|
||||||
|
|
||||||
pg = (Page) palloc(BLCKSZ);
|
PageInit((Page) pg.data, BLCKSZ, 0);
|
||||||
PageInit(pg, BLCKSZ, 0);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use the relation extension lock to lock out other backends trying to
|
* We use the relation extension lock to lock out other backends trying to
|
||||||
@ -644,10 +643,10 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
|
|||||||
/* Now extend the file */
|
/* Now extend the file */
|
||||||
while (vm_nblocks_now < vm_nblocks)
|
while (vm_nblocks_now < vm_nblocks)
|
||||||
{
|
{
|
||||||
PageSetChecksumInplace(pg, vm_nblocks_now);
|
PageSetChecksumInplace((Page) pg.data, vm_nblocks_now);
|
||||||
|
|
||||||
smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
|
smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
|
||||||
(char *) pg, false);
|
pg.data, false);
|
||||||
vm_nblocks_now++;
|
vm_nblocks_now++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -664,6 +663,4 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
|
|||||||
rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
|
rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
|
||||||
|
|
||||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||||
|
|
||||||
pfree(pg);
|
|
||||||
}
|
}
|
||||||
|
@ -3173,8 +3173,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
|
|||||||
{
|
{
|
||||||
char path[MAXPGPATH];
|
char path[MAXPGPATH];
|
||||||
char tmppath[MAXPGPATH];
|
char tmppath[MAXPGPATH];
|
||||||
char zbuffer_raw[XLOG_BLCKSZ + MAXIMUM_ALIGNOF];
|
PGAlignedXLogBlock zbuffer;
|
||||||
char *zbuffer;
|
|
||||||
XLogSegNo installed_segno;
|
XLogSegNo installed_segno;
|
||||||
int max_advance;
|
int max_advance;
|
||||||
int fd;
|
int fd;
|
||||||
@ -3228,16 +3227,12 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
|
|||||||
* fsync below) that all the indirect blocks are down on disk. Therefore,
|
* fsync below) that all the indirect blocks are down on disk. Therefore,
|
||||||
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
|
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
|
||||||
* log file.
|
* log file.
|
||||||
*
|
|
||||||
* Note: ensure the buffer is reasonably well-aligned; this may save a few
|
|
||||||
* cycles transferring data to the kernel.
|
|
||||||
*/
|
*/
|
||||||
zbuffer = (char *) MAXALIGN(zbuffer_raw);
|
memset(zbuffer.data, 0, XLOG_BLCKSZ);
|
||||||
memset(zbuffer, 0, XLOG_BLCKSZ);
|
|
||||||
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
|
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
|
||||||
{
|
{
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
|
if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
int save_errno = errno;
|
||||||
|
|
||||||
@ -3328,7 +3323,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
|
|||||||
{
|
{
|
||||||
char path[MAXPGPATH];
|
char path[MAXPGPATH];
|
||||||
char tmppath[MAXPGPATH];
|
char tmppath[MAXPGPATH];
|
||||||
char buffer[XLOG_BLCKSZ];
|
PGAlignedXLogBlock buffer;
|
||||||
int srcfd;
|
int srcfd;
|
||||||
int fd;
|
int fd;
|
||||||
int nbytes;
|
int nbytes;
|
||||||
@ -3364,7 +3359,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
|
|||||||
for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer))
|
for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer))
|
||||||
{
|
{
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if ((int) read(srcfd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
|
if ((int) read(srcfd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
|
||||||
{
|
{
|
||||||
if (errno != 0)
|
if (errno != 0)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
@ -3375,7 +3370,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
|
|||||||
(errmsg("not enough data in file \"%s\"", path)));
|
(errmsg("not enough data in file \"%s\"", path)));
|
||||||
}
|
}
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
|
if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
int save_errno = errno;
|
||||||
|
|
||||||
@ -9200,7 +9195,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
|
|||||||
*/
|
*/
|
||||||
if (XLogCheckBuffer(rdata, false, &lsn, &bkpb))
|
if (XLogCheckBuffer(rdata, false, &lsn, &bkpb))
|
||||||
{
|
{
|
||||||
char copied_buffer[BLCKSZ];
|
PGAlignedBlock copied_buffer;
|
||||||
char *origdata = (char *) BufferGetBlock(buffer);
|
char *origdata = (char *) BufferGetBlock(buffer);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -9212,8 +9207,8 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
|
|||||||
* and hole_offset to 0; so the following code is safe for either
|
* and hole_offset to 0; so the following code is safe for either
|
||||||
* case.
|
* case.
|
||||||
*/
|
*/
|
||||||
memcpy(copied_buffer, origdata, bkpb.hole_offset);
|
memcpy(copied_buffer.data, origdata, bkpb.hole_offset);
|
||||||
memcpy(copied_buffer + bkpb.hole_offset,
|
memcpy(copied_buffer.data + bkpb.hole_offset,
|
||||||
origdata + bkpb.hole_offset + bkpb.hole_length,
|
origdata + bkpb.hole_offset + bkpb.hole_length,
|
||||||
BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
|
BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
|
||||||
|
|
||||||
@ -9228,7 +9223,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
|
|||||||
/*
|
/*
|
||||||
* Save copy of the buffer.
|
* Save copy of the buffer.
|
||||||
*/
|
*/
|
||||||
rdata[1].data = copied_buffer;
|
rdata[1].data = copied_buffer.data;
|
||||||
rdata[1].len = BLCKSZ - bkpb.hole_length;
|
rdata[1].len = BLCKSZ - bkpb.hole_length;
|
||||||
rdata[1].buffer = InvalidBuffer;
|
rdata[1].buffer = InvalidBuffer;
|
||||||
rdata[1].next = NULL;
|
rdata[1].next = NULL;
|
||||||
|
@ -9486,21 +9486,14 @@ static void
|
|||||||
copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
||||||
ForkNumber forkNum, char relpersistence)
|
ForkNumber forkNum, char relpersistence)
|
||||||
{
|
{
|
||||||
char *buf;
|
PGAlignedBlock buf;
|
||||||
Page page;
|
Page page;
|
||||||
bool use_wal;
|
bool use_wal;
|
||||||
bool copying_initfork;
|
bool copying_initfork;
|
||||||
BlockNumber nblocks;
|
BlockNumber nblocks;
|
||||||
BlockNumber blkno;
|
BlockNumber blkno;
|
||||||
|
|
||||||
/*
|
page = (Page) buf.data;
|
||||||
* palloc the buffer so that it's MAXALIGN'd. If it were just a local
|
|
||||||
* char[] array, the compiler might align it on any byte boundary, which
|
|
||||||
* can seriously hurt transfer speed to and from the kernel; not to
|
|
||||||
* mention possibly making log_newpage's accesses to the page header fail.
|
|
||||||
*/
|
|
||||||
buf = (char *) palloc(BLCKSZ);
|
|
||||||
page = (Page) buf;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The init fork for an unlogged relation in many respects has to be
|
* The init fork for an unlogged relation in many respects has to be
|
||||||
@ -9524,7 +9517,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
|||||||
/* If we got a cancel signal during the copy of the data, quit */
|
/* If we got a cancel signal during the copy of the data, quit */
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
smgrread(src, forkNum, blkno, buf);
|
smgrread(src, forkNum, blkno, buf.data);
|
||||||
|
|
||||||
if (!PageIsVerified(page, blkno))
|
if (!PageIsVerified(page, blkno))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
@ -9550,11 +9543,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
|||||||
* rel, because there's no need for smgr to schedule an fsync for this
|
* rel, because there's no need for smgr to schedule an fsync for this
|
||||||
* write; we'll do it ourselves below.
|
* write; we'll do it ourselves below.
|
||||||
*/
|
*/
|
||||||
smgrextend(dst, forkNum, blkno, buf, true);
|
smgrextend(dst, forkNum, blkno, buf.data, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
pfree(buf);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
|
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
|
||||||
* to ensure that the toast table gets fsync'd too. (For a temp or
|
* to ensure that the toast table gets fsync'd too. (For a temp or
|
||||||
|
@ -494,16 +494,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
|
|||||||
bytesleft = histfilelen;
|
bytesleft = histfilelen;
|
||||||
while (bytesleft > 0)
|
while (bytesleft > 0)
|
||||||
{
|
{
|
||||||
char rbuf[BLCKSZ];
|
PGAlignedBlock rbuf;
|
||||||
int nread;
|
int nread;
|
||||||
|
|
||||||
nread = read(fd, rbuf, sizeof(rbuf));
|
nread = read(fd, rbuf.data, sizeof(rbuf));
|
||||||
if (nread <= 0)
|
if (nread <= 0)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not read file \"%s\": %m",
|
errmsg("could not read file \"%s\": %m",
|
||||||
path)));
|
path)));
|
||||||
pq_sendbytes(&buf, rbuf, nread);
|
pq_sendbytes(&buf, rbuf.data, nread);
|
||||||
bytesleft -= nread;
|
bytesleft -= nread;
|
||||||
}
|
}
|
||||||
CloseTransientFile(fd);
|
CloseTransientFile(fd);
|
||||||
|
@ -86,7 +86,7 @@ struct BufFile
|
|||||||
off_t curOffset; /* offset part of current pos */
|
off_t curOffset; /* offset part of current pos */
|
||||||
int pos; /* next read/write position in buffer */
|
int pos; /* next read/write position in buffer */
|
||||||
int nbytes; /* total # of valid bytes in buffer */
|
int nbytes; /* total # of valid bytes in buffer */
|
||||||
char buffer[BLCKSZ];
|
PGAlignedBlock buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
static BufFile *makeBufFile(File firstfile);
|
static BufFile *makeBufFile(File firstfile);
|
||||||
@ -254,7 +254,7 @@ BufFileLoadBuffer(BufFile *file)
|
|||||||
/*
|
/*
|
||||||
* Read whatever we can get, up to a full bufferload.
|
* Read whatever we can get, up to a full bufferload.
|
||||||
*/
|
*/
|
||||||
file->nbytes = FileRead(thisfile, file->buffer, sizeof(file->buffer));
|
file->nbytes = FileRead(thisfile, file->buffer.data, sizeof(file->buffer));
|
||||||
if (file->nbytes < 0)
|
if (file->nbytes < 0)
|
||||||
file->nbytes = 0;
|
file->nbytes = 0;
|
||||||
file->offsets[file->curFile] += file->nbytes;
|
file->offsets[file->curFile] += file->nbytes;
|
||||||
@ -317,7 +317,7 @@ BufFileDumpBuffer(BufFile *file)
|
|||||||
return; /* seek failed, give up */
|
return; /* seek failed, give up */
|
||||||
file->offsets[file->curFile] = file->curOffset;
|
file->offsets[file->curFile] = file->curOffset;
|
||||||
}
|
}
|
||||||
bytestowrite = FileWrite(thisfile, file->buffer + wpos, bytestowrite);
|
bytestowrite = FileWrite(thisfile, file->buffer.data + wpos, bytestowrite);
|
||||||
if (bytestowrite <= 0)
|
if (bytestowrite <= 0)
|
||||||
return; /* failed to write */
|
return; /* failed to write */
|
||||||
file->offsets[file->curFile] += bytestowrite;
|
file->offsets[file->curFile] += bytestowrite;
|
||||||
@ -385,7 +385,7 @@ BufFileRead(BufFile *file, void *ptr, size_t size)
|
|||||||
nthistime = size;
|
nthistime = size;
|
||||||
Assert(nthistime > 0);
|
Assert(nthistime > 0);
|
||||||
|
|
||||||
memcpy(ptr, file->buffer + file->pos, nthistime);
|
memcpy(ptr, file->buffer.data + file->pos, nthistime);
|
||||||
|
|
||||||
file->pos += nthistime;
|
file->pos += nthistime;
|
||||||
ptr = (void *) ((char *) ptr + nthistime);
|
ptr = (void *) ((char *) ptr + nthistime);
|
||||||
@ -432,7 +432,7 @@ BufFileWrite(BufFile *file, void *ptr, size_t size)
|
|||||||
nthistime = size;
|
nthistime = size;
|
||||||
Assert(nthistime > 0);
|
Assert(nthistime > 0);
|
||||||
|
|
||||||
memcpy(file->buffer + file->pos, ptr, nthistime);
|
memcpy(file->buffer.data + file->pos, ptr, nthistime);
|
||||||
|
|
||||||
file->dirty = true;
|
file->dirty = true;
|
||||||
file->pos += nthistime;
|
file->pos += nthistime;
|
||||||
|
@ -588,10 +588,9 @@ static void
|
|||||||
fsm_extend(Relation rel, BlockNumber fsm_nblocks)
|
fsm_extend(Relation rel, BlockNumber fsm_nblocks)
|
||||||
{
|
{
|
||||||
BlockNumber fsm_nblocks_now;
|
BlockNumber fsm_nblocks_now;
|
||||||
Page pg;
|
PGAlignedBlock pg;
|
||||||
|
|
||||||
pg = (Page) palloc(BLCKSZ);
|
PageInit((Page) pg.data, BLCKSZ, 0);
|
||||||
PageInit(pg, BLCKSZ, 0);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use the relation extension lock to lock out other backends trying to
|
* We use the relation extension lock to lock out other backends trying to
|
||||||
@ -621,10 +620,10 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
|
|||||||
|
|
||||||
while (fsm_nblocks_now < fsm_nblocks)
|
while (fsm_nblocks_now < fsm_nblocks)
|
||||||
{
|
{
|
||||||
PageSetChecksumInplace(pg, fsm_nblocks_now);
|
PageSetChecksumInplace((Page) pg.data, fsm_nblocks_now);
|
||||||
|
|
||||||
smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now,
|
smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now,
|
||||||
(char *) pg, false);
|
pg.data, false);
|
||||||
fsm_nblocks_now++;
|
fsm_nblocks_now++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -632,8 +631,6 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
|
|||||||
rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now;
|
rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now;
|
||||||
|
|
||||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||||
|
|
||||||
pfree(pg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -88,7 +88,7 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir,
|
|||||||
int f;
|
int f;
|
||||||
char fn[MAXPGPATH];
|
char fn[MAXPGPATH];
|
||||||
struct stat statbuf;
|
struct stat statbuf;
|
||||||
char *zerobuf;
|
PGAlignedXLogBlock zerobuf;
|
||||||
int bytes;
|
int bytes;
|
||||||
XLogSegNo segno;
|
XLogSegNo segno;
|
||||||
|
|
||||||
@ -134,11 +134,11 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* New, empty, file. So pad it to 16Mb with zeroes */
|
/* New, empty, file. So pad it to 16Mb with zeroes */
|
||||||
zerobuf = pg_malloc0(XLOG_BLCKSZ);
|
memset(zerobuf.data, 0, XLOG_BLCKSZ);
|
||||||
for (bytes = 0; bytes < XLogSegSize; bytes += XLOG_BLCKSZ)
|
for (bytes = 0; bytes < XLogSegSize; bytes += XLOG_BLCKSZ)
|
||||||
{
|
{
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
if (write(f, zerobuf.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||||
{
|
{
|
||||||
/* if write didn't set errno, assume problem is no disk space */
|
/* if write didn't set errno, assume problem is no disk space */
|
||||||
if (errno == 0)
|
if (errno == 0)
|
||||||
@ -146,13 +146,11 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir,
|
|||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
_("%s: could not pad transaction log file \"%s\": %s\n"),
|
_("%s: could not pad transaction log file \"%s\": %s\n"),
|
||||||
progname, fn, strerror(errno));
|
progname, fn, strerror(errno));
|
||||||
free(zerobuf);
|
|
||||||
close(f);
|
close(f);
|
||||||
unlink(fn);
|
unlink(fn);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(zerobuf);
|
|
||||||
|
|
||||||
if (lseek(f, SEEK_SET, 0) != 0)
|
if (lseek(f, SEEK_SET, 0) != 0)
|
||||||
{
|
{
|
||||||
|
@ -1060,7 +1060,7 @@ KillExistingArchiveStatus(void)
|
|||||||
static void
|
static void
|
||||||
WriteEmptyXLOG(void)
|
WriteEmptyXLOG(void)
|
||||||
{
|
{
|
||||||
char *buffer;
|
PGAlignedXLogBlock buffer;
|
||||||
XLogPageHeader page;
|
XLogPageHeader page;
|
||||||
XLogLongPageHeader longpage;
|
XLogLongPageHeader longpage;
|
||||||
XLogRecord *record;
|
XLogRecord *record;
|
||||||
@ -1069,12 +1069,10 @@ WriteEmptyXLOG(void)
|
|||||||
int fd;
|
int fd;
|
||||||
int nbytes;
|
int nbytes;
|
||||||
|
|
||||||
/* Use malloc() to ensure buffer is MAXALIGNED */
|
memset(buffer.data, 0, XLOG_BLCKSZ);
|
||||||
buffer = (char *) pg_malloc(XLOG_BLCKSZ);
|
|
||||||
page = (XLogPageHeader) buffer;
|
|
||||||
memset(buffer, 0, XLOG_BLCKSZ);
|
|
||||||
|
|
||||||
/* Set up the XLOG page header */
|
/* Set up the XLOG page header */
|
||||||
|
page = (XLogPageHeader) buffer.data;
|
||||||
page->xlp_magic = XLOG_PAGE_MAGIC;
|
page->xlp_magic = XLOG_PAGE_MAGIC;
|
||||||
page->xlp_info = XLP_LONG_HEADER;
|
page->xlp_info = XLP_LONG_HEADER;
|
||||||
page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
|
page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
|
||||||
@ -1116,7 +1114,7 @@ WriteEmptyXLOG(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||||
{
|
{
|
||||||
/* if write didn't set errno, assume problem is no disk space */
|
/* if write didn't set errno, assume problem is no disk space */
|
||||||
if (errno == 0)
|
if (errno == 0)
|
||||||
@ -1127,11 +1125,11 @@ WriteEmptyXLOG(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Fill the rest of the file with zeroes */
|
/* Fill the rest of the file with zeroes */
|
||||||
memset(buffer, 0, XLOG_BLCKSZ);
|
memset(buffer.data, 0, XLOG_BLCKSZ);
|
||||||
for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
|
for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
|
||||||
{
|
{
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||||
{
|
{
|
||||||
if (errno == 0)
|
if (errno == 0)
|
||||||
errno = ENOSPC;
|
errno = ENOSPC;
|
||||||
|
@ -867,6 +867,32 @@ typedef NameData *Name;
|
|||||||
* ----------------------------------------------------------------
|
* ----------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use this, not "char buf[BLCKSZ]", to declare a field or local variable
|
||||||
|
* holding a page buffer, if that page might be accessed as a page and not
|
||||||
|
* just a string of bytes. Otherwise the variable might be under-aligned,
|
||||||
|
* causing problems on alignment-picky hardware. (In some places, we use
|
||||||
|
* this to declare buffers even though we only pass them to read() and
|
||||||
|
* write(), because copying to/from aligned buffers is usually faster than
|
||||||
|
* using unaligned buffers.) We include both "double" and "int64" in the
|
||||||
|
* union to ensure that the compiler knows the value must be MAXALIGN'ed
|
||||||
|
* (cf. configure's computation of MAXIMUM_ALIGNOF).
|
||||||
|
*/
|
||||||
|
typedef union PGAlignedBlock
|
||||||
|
{
|
||||||
|
char data[BLCKSZ];
|
||||||
|
double force_align_d;
|
||||||
|
int64 force_align_i64;
|
||||||
|
} PGAlignedBlock;
|
||||||
|
|
||||||
|
/* Same, but for an XLOG_BLCKSZ-sized buffer */
|
||||||
|
typedef union PGAlignedXLogBlock
|
||||||
|
{
|
||||||
|
char data[XLOG_BLCKSZ];
|
||||||
|
double force_align_d;
|
||||||
|
int64 force_align_i64;
|
||||||
|
} PGAlignedXLogBlock;
|
||||||
|
|
||||||
/* msb for char */
|
/* msb for char */
|
||||||
#define HIGHBIT (0x80)
|
#define HIGHBIT (0x80)
|
||||||
#define IS_HIGHBIT_SET(ch) ((unsigned char)(ch) & HIGHBIT)
|
#define IS_HIGHBIT_SET(ch) ((unsigned char)(ch) & HIGHBIT)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user