1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00
Files
postgres/contrib/pg_resetxlog/pg_resetxlog.c
Tom Lane 1173344e74 Adjust WAL code so that checkpoints truncate the xlog at the previous
checkpoint's redo pointer, not its undo pointer, per discussion in
pghackers a few days ago.  No point in hanging onto undo information
until we have the ability to do something with it --- and this solves
a rather large problem with log space for long-running transactions.
Also, change all calls of write() to detect the case where write
returned a count less than requested, but failed to set errno.
Presume that this situation indicates ENOSPC, and give the appropriate
error message, rather than a random message associated with the previous
value of errno.
2001-06-06 17:07:46 +00:00

1027 lines
28 KiB
C

/*-------------------------------------------------------------------------
*
* pg_resetxlog.c
* A utility to "zero out" the xlog when it's corrupt beyond recovery.
* Can also rebuild pg_control if needed.
*
* The theory of operation is fairly simple:
* 1. Read the existing pg_control (which will include the last
* checkpoint record). If it is an old format then update to
* current format.
* 2. If pg_control is corrupt, attempt to intuit reasonable values,
* by scanning the old xlog if necessary.
* 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
* record at the start of xlog.
* 4. Flush the existing xlog files and write a new segment with
* just a checkpoint record in it. The new segment is positioned
* just past the end of the old xlog, so that existing LSNs in
* data pages will appear to be "in the past".
* This is all pretty straightforward except for the intuition part of
* step 2 ...
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/contrib/pg_resetxlog/Attic/pg_resetxlog.c,v 1.5 2001/06/06 17:07:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <errno.h>
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#ifdef USE_LOCALE
#include <locale.h>
#endif
#include "access/xlog.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
/******************** stuff copied from xlog.c ********************/
/* Increment an xlogid/segment pair */
#define NextLogSeg(logId, logSeg) \
do { \
if ((logSeg) >= XLogSegsPerFile-1) \
{ \
(logId)++; \
(logSeg) = 0; \
} \
else \
(logSeg)++; \
} while (0)
/*
* Compute ID and segment from an XLogRecPtr.
*
* For XLByteToSeg, do the computation at face value. For XLByteToPrevSeg,
* a boundary byte is taken to be in the previous segment. This is suitable
* for deciding which segment to write given a pointer to a record end,
* for example.
*/
#define XLByteToSeg(xlrp, logId, logSeg) \
( logId = (xlrp).xlogid, \
logSeg = (xlrp).xrecoff / XLogSegSize \
)
#define XLByteToPrevSeg(xlrp, logId, logSeg) \
( logId = (xlrp).xlogid, \
logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
)
/*
* Is an XLogRecPtr within a particular XLOG segment?
*
* For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg,
* a boundary byte is taken to be in the previous segment.
*/
#define XLByteInSeg(xlrp, logId, logSeg) \
((xlrp).xlogid == (logId) && \
(xlrp).xrecoff / XLogSegSize == (logSeg))
#define XLByteInPrevSeg(xlrp, logId, logSeg) \
((xlrp).xlogid == (logId) && \
((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
#define XLogFileName(path, log, seg) \
snprintf(path, MAXPGPATH, "%s/%08X%08X", \
XLogDir, log, seg)
/*
* _INTL_MAXLOGRECSZ: max space needed for a record including header and
* any backup-block data.
*/
#define _INTL_MAXLOGRECSZ (SizeOfXLogRecord + MAXLOGRECSZ + \
XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
/******************** end of stuff copied from xlog.c ********************/
static char *DataDir; /* locations of important stuff */
static char XLogDir[MAXPGPATH];
static char ControlFilePath[MAXPGPATH];
static ControlFileData ControlFile; /* pg_control values */
static uint32 newXlogId,
newXlogSeg; /* ID/Segment of new XLOG segment */
static bool guessed = false; /* T if we had to guess at any values */
static bool CheckControlVersion0(char *buffer, int len);
static int
XLogFileOpen(uint32 log, uint32 seg)
{
char path[MAXPGPATH];
int fd;
XLogFileName(path, log, seg);
fd = open(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
return (fd);
}
/*
* Try to read the existing pg_control file.
*
* This routine is also responsible for updating old pg_control versions
* to the current format.
*/
static bool
ReadControlFile(void)
{
int fd;
int len;
char *buffer;
crc64 crc;
if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
{
/*
* If pg_control is not there at all, or we can't read it, the
* odds are we've been handed a bad DataDir path, so give up. User
* can do "touch pg_control" to force us to proceed.
*/
perror("Failed to open $PGDATA/global/pg_control for reading");
if (errno == ENOENT)
fprintf(stderr, "If you're sure the PGDATA path is correct, do\n"
" touch %s\n"
"and try again.\n", ControlFilePath);
exit(1);
}
/* Use malloc to ensure we have a maxaligned buffer */
buffer = (char *) malloc(BLCKSZ);
len = read(fd, buffer, BLCKSZ);
if (len < 0)
{
perror("Failed to read $PGDATA/global/pg_control");
exit(1);
}
close(fd);
if (len >= sizeof(ControlFileData) &&
((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
{
/* Seems to be current version --- check the CRC. */
INIT_CRC64(crc);
COMP_CRC64(crc,
buffer + sizeof(crc64),
sizeof(ControlFileData) - sizeof(crc64));
FIN_CRC64(crc);
if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc))
{
/* Valid data... */
memcpy(&ControlFile, buffer, sizeof(ControlFile));
return true;
}
fprintf(stderr, "pg_control exists but has invalid CRC; proceed with caution.\n");
/* We will use the data anyway, but treat it as guessed. */
memcpy(&ControlFile, buffer, sizeof(ControlFile));
guessed = true;
return true;
}
/*
* Maybe it's a 7.1beta pg_control.
*/
if (CheckControlVersion0(buffer, len))
return true;
/* Looks like it's a mess. */
fprintf(stderr, "pg_control exists but is broken or unknown version; ignoring it.\n");
return false;
}
/******************* routines for old XLOG format *******************/
/*
* This format was in use in 7.1 beta releases through 7.1beta5. The
* pg_control layout was different, and so were the XLOG page headers.
* The XLOG record header format was physically the same as 7.1 release,
* but interpretation of the xl_len field was not.
*/
typedef struct crc64V0
{
uint32 crc1;
uint32 crc2;
} crc64V0;
static uint32 crc_tableV0[] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};
#define INIT_CRC64V0(crc) ((crc).crc1 = 0xffffffff, (crc).crc2 = 0xffffffff)
#define FIN_CRC64V0(crc) ((crc).crc1 ^= 0xffffffff, (crc).crc2 ^= 0xffffffff)
#define COMP_CRC64V0(crc, data, len) \
{\
uint32 __c1 = (crc).crc1;\
uint32 __c2 = (crc).crc2;\
char *__data = (char *) (data);\
uint32 __len = (len);\
\
while (__len >= 2)\
{\
__c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
__c2 = crc_tableV0[(__c2 ^ *__data++) & 0xff] ^ (__c2 >> 8);\
__len -= 2;\
}\
if (__len > 0)\
__c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
(crc).crc1 = __c1;\
(crc).crc2 = __c2;\
}
#define EQ_CRC64V0(c1,c2) ((c1).crc1 == (c2).crc1 && (c1).crc2 == (c2).crc2)
#define LOCALE_NAME_BUFLEN_V0 128
typedef struct ControlFileDataV0
{
crc64V0 crc;
uint32 logId; /* current log file id */
uint32 logSeg; /* current log file segment (1-based) */
XLogRecPtr checkPoint; /* last check point record ptr */
time_t time; /* time stamp of last modification */
DBState state; /* see enum above */
uint32 blcksz; /* block size for this DB */
uint32 relseg_size; /* blocks per segment of large relation */
uint32 catalog_version_no; /* internal version number */
char lc_collate[LOCALE_NAME_BUFLEN_V0];
char lc_ctype[LOCALE_NAME_BUFLEN_V0];
char archdir[MAXPGPATH]; /* where to move offline log files */
} ControlFileDataV0;
typedef struct CheckPointV0
{
XLogRecPtr redo; /* next RecPtr available when we */
/* began to create CheckPoint */
/* (i.e. REDO start point) */
XLogRecPtr undo; /* first record of oldest in-progress */
/* transaction when we started */
/* (i.e. UNDO end point) */
StartUpID ThisStartUpID;
TransactionId nextXid;
Oid nextOid;
bool Shutdown;
} CheckPointV0;
typedef struct XLogRecordV0
{
crc64V0 xl_crc;
XLogRecPtr xl_prev; /* ptr to previous record in log */
XLogRecPtr xl_xact_prev; /* ptr to previous record of this xact */
TransactionId xl_xid; /* xact id */
uint16 xl_len; /* total len of record *data* */
uint8 xl_info;
RmgrId xl_rmid; /* resource manager inserted this record */
} XLogRecordV0;
#define SizeOfXLogRecordV0 DOUBLEALIGN(sizeof(XLogRecordV0))
typedef struct XLogContRecordV0
{
uint16 xl_len; /* len of data left */
} XLogContRecordV0;
#define SizeOfXLogContRecordV0 DOUBLEALIGN(sizeof(XLogContRecordV0))
#define XLOG_PAGE_MAGIC_V0 0x17345168
typedef struct XLogPageHeaderDataV0
{
uint32 xlp_magic;
uint16 xlp_info;
} XLogPageHeaderDataV0;
#define SizeOfXLogPHDV0 DOUBLEALIGN(sizeof(XLogPageHeaderDataV0))
typedef XLogPageHeaderDataV0 *XLogPageHeaderV0;
static bool RecordIsValidV0(XLogRecordV0 * record);
static XLogRecordV0 *ReadRecordV0(XLogRecPtr *RecPtr, char *buffer);
static bool ValidXLOGHeaderV0(XLogPageHeaderV0 hdr);
/*
* Try to interpret pg_control contents as "version 0" format.
*/
static bool
CheckControlVersion0(char *buffer, int len)
{
crc64V0 crc;
ControlFileDataV0 *oldfile;
XLogRecordV0 *record;
CheckPointV0 *oldchkpt;
if (len < sizeof(ControlFileDataV0))
return false;
/* Check CRC the version-0 way. */
INIT_CRC64V0(crc);
COMP_CRC64V0(crc,
buffer + sizeof(crc64V0),
sizeof(ControlFileDataV0) - sizeof(crc64V0));
FIN_CRC64V0(crc);
if (!EQ_CRC64V0(crc, ((ControlFileDataV0 *) buffer)->crc))
return false;
/* Valid data, convert useful fields to new-style pg_control format */
oldfile = (ControlFileDataV0 *) buffer;
memset(&ControlFile, 0, sizeof(ControlFile));
ControlFile.pg_control_version = PG_CONTROL_VERSION;
ControlFile.catalog_version_no = oldfile->catalog_version_no;
ControlFile.state = oldfile->state;
ControlFile.logId = oldfile->logId;
ControlFile.logSeg = oldfile->logSeg;
ControlFile.blcksz = oldfile->blcksz;
ControlFile.relseg_size = oldfile->relseg_size;
strcpy(ControlFile.lc_collate, oldfile->lc_collate);
strcpy(ControlFile.lc_ctype, oldfile->lc_ctype);
/*
* Since this format did not include a copy of the latest checkpoint
* record, we have to go rooting in the old XLOG to get that.
*/
record = ReadRecordV0(&oldfile->checkPoint,
(char *) malloc(_INTL_MAXLOGRECSZ));
if (record == NULL)
{
/*
* We have to guess at the checkpoint contents.
*/
guessed = true;
ControlFile.checkPointCopy.ThisStartUpID = 0;
ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
return true;
}
oldchkpt = (CheckPointV0 *) XLogRecGetData(record);
ControlFile.checkPointCopy.ThisStartUpID = oldchkpt->ThisStartUpID;
ControlFile.checkPointCopy.nextXid = oldchkpt->nextXid;
ControlFile.checkPointCopy.nextOid = oldchkpt->nextOid;
return true;
}
/*
* CRC-check an XLOG V0 record. We do not believe the contents of an XLOG
* record (other than to the minimal extent of computing the amount of
* data to read in) until we've checked the CRCs.
*
* We assume all of the record has been read into memory at *record.
*/
static bool
RecordIsValidV0(XLogRecordV0 * record)
{
crc64V0 crc;
uint32 len = record->xl_len;
/*
* NB: this code is not right for V0 records containing backup blocks,
* but for now it's only going to be applied to checkpoint records, so
* I'm not going to worry about it...
*/
INIT_CRC64V0(crc);
COMP_CRC64V0(crc, XLogRecGetData(record), len);
COMP_CRC64V0(crc, (char *) record + sizeof(crc64V0),
SizeOfXLogRecordV0 - sizeof(crc64V0));
FIN_CRC64V0(crc);
if (!EQ_CRC64V0(record->xl_crc, crc))
return false;
return (true);
}
/*
* Attempt to read an XLOG V0 record at recptr.
*
* If no valid record is available, returns NULL.
*
* buffer is a workspace at least _INTL_MAXLOGRECSZ bytes long. It is needed
* to reassemble a record that crosses block boundaries. Note that on
* successful return, the returned record pointer always points at buffer.
*/
static XLogRecordV0 *
ReadRecordV0(XLogRecPtr *RecPtr, char *buffer)
{
static int readFile = -1;
static uint32 readId = 0;
static uint32 readSeg = 0;
static uint32 readOff = 0;
static char *readBuf = NULL;
XLogRecordV0 *record;
uint32 len,
total_len;
uint32 targetPageOff;
if (readBuf == NULL)
readBuf = (char *) malloc(BLCKSZ);
XLByteToSeg(*RecPtr, readId, readSeg);
if (readFile < 0)
{
readFile = XLogFileOpen(readId, readSeg);
if (readFile < 0)
goto next_record_is_invalid;
readOff = (uint32) (-1);/* force read to occur below */
}
targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ;
if (readOff != targetPageOff)
{
readOff = targetPageOff;
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
goto next_record_is_invalid;
if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
goto next_record_is_invalid;
if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
goto next_record_is_invalid;
}
if ((((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHDV0)
goto next_record_is_invalid;
record = (XLogRecordV0 *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);
if (record->xl_len == 0)
goto next_record_is_invalid;
/*
* Compute total length of record including any appended backup
* blocks.
*/
total_len = SizeOfXLogRecordV0 + record->xl_len;
/*
* Make sure it will fit in buffer (currently, it is mechanically
* impossible for this test to fail, but it seems like a good idea
* anyway).
*/
if (total_len > _INTL_MAXLOGRECSZ)
goto next_record_is_invalid;
len = BLCKSZ - RecPtr->xrecoff % BLCKSZ;
if (total_len > len)
{
/* Need to reassemble record */
XLogContRecordV0 *contrecord;
uint32 gotlen = len;
memcpy(buffer, record, len);
record = (XLogRecordV0 *) buffer;
buffer += len;
for (;;)
{
readOff += BLCKSZ;
if (readOff >= XLogSegSize)
{
close(readFile);
readFile = -1;
NextLogSeg(readId, readSeg);
readFile = XLogFileOpen(readId, readSeg);
if (readFile < 0)
goto next_record_is_invalid;
readOff = 0;
}
if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
goto next_record_is_invalid;
if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
goto next_record_is_invalid;
if (!(((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
goto next_record_is_invalid;
contrecord = (XLogContRecordV0 *) ((char *) readBuf + SizeOfXLogPHDV0);
if (contrecord->xl_len == 0 ||
total_len != (contrecord->xl_len + gotlen))
goto next_record_is_invalid;
len = BLCKSZ - SizeOfXLogPHDV0 - SizeOfXLogContRecordV0;
if (contrecord->xl_len > len)
{
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecordV0, len);
gotlen += len;
buffer += len;
continue;
}
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecordV0,
contrecord->xl_len);
break;
}
if (!RecordIsValidV0(record))
goto next_record_is_invalid;
return record;
}
/* Record does not cross a page boundary */
if (!RecordIsValidV0(record))
goto next_record_is_invalid;
memcpy(buffer, record, total_len);
return (XLogRecordV0 *) buffer;
next_record_is_invalid:;
close(readFile);
readFile = -1;
return NULL;
}
/*
* Check whether the xlog header of a page just read in looks valid.
*
* This is just a convenience subroutine to avoid duplicated code in
* ReadRecord. It's not intended for use from anywhere else.
*/
static bool
ValidXLOGHeaderV0(XLogPageHeaderV0 hdr)
{
if (hdr->xlp_magic != XLOG_PAGE_MAGIC_V0)
return false;
if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
return false;
return true;
}
/******************* end of routines for old XLOG format *******************/
/*
* Guess at pg_control values when we can't read the old ones.
*/
static void
GuessControlValues(void)
{
#ifdef USE_LOCALE
char *localeptr;
#endif
/*
* Set up a completely default set of pg_control values.
*/
guessed = true;
memset(&ControlFile, 0, sizeof(ControlFile));
ControlFile.pg_control_version = PG_CONTROL_VERSION;
ControlFile.catalog_version_no = CATALOG_VERSION_NO;
ControlFile.checkPointCopy.redo.xlogid = 0;
ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD;
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
ControlFile.checkPointCopy.ThisStartUpID = 0;
ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
ControlFile.checkPointCopy.time = time(NULL);
ControlFile.state = DB_SHUTDOWNED;
ControlFile.time = time(NULL);
ControlFile.logId = 0;
ControlFile.logSeg = 1;
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
ControlFile.blcksz = BLCKSZ;
ControlFile.relseg_size = RELSEG_SIZE;
#ifdef USE_LOCALE
localeptr = setlocale(LC_COLLATE, "");
if (!localeptr)
{
fprintf(stderr, "Invalid LC_COLLATE setting\n");
exit(1);
}
StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
localeptr = setlocale(LC_CTYPE, "");
if (!localeptr)
{
fprintf(stderr, "Invalid LC_CTYPE setting\n");
exit(1);
}
StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
#else
strcpy(ControlFile.lc_collate, "C");
strcpy(ControlFile.lc_ctype, "C");
#endif
/*
* XXX eventually, should try to grovel through old XLOG to develop
* more accurate values for startupid, nextXID, and nextOID.
*/
}
/*
* Print the guessed pg_control values when we had to guess.
*
* NB: this display should be just those fields that will not be
* reset by RewriteControlFile().
*/
static void
PrintControlValues(void)
{
printf("Guessed-at pg_control values:\n\n"
"pg_control version number: %u\n"
"Catalog version number: %u\n"
"Current log file id: %u\n"
"Next log file segment: %u\n"
"Latest checkpoint's StartUpID: %u\n"
"Latest checkpoint's NextXID: %u\n"
"Latest checkpoint's NextOID: %u\n"
"Database block size: %u\n"
"Blocks per segment of large relation: %u\n"
"LC_COLLATE: %s\n"
"LC_CTYPE: %s\n",
ControlFile.pg_control_version,
ControlFile.catalog_version_no,
ControlFile.logId,
ControlFile.logSeg,
ControlFile.checkPointCopy.ThisStartUpID,
ControlFile.checkPointCopy.nextXid,
ControlFile.checkPointCopy.nextOid,
ControlFile.blcksz,
ControlFile.relseg_size,
ControlFile.lc_collate,
ControlFile.lc_ctype);
}
/*
* Write out the new pg_control file.
*/
static void
RewriteControlFile(void)
{
int fd;
char buffer[BLCKSZ]; /* need not be aligned */
/*
* Adjust fields as needed to force an empty XLOG starting at the next
* available segment.
*/
newXlogId = ControlFile.logId;
newXlogSeg = ControlFile.logSeg;
/* be sure we wrap around correctly at end of a logfile */
NextLogSeg(newXlogId, newXlogSeg);
ControlFile.checkPointCopy.redo.xlogid = newXlogId;
ControlFile.checkPointCopy.redo.xrecoff =
newXlogSeg * XLogSegSize + SizeOfXLogPHD;
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
ControlFile.checkPointCopy.time = time(NULL);
ControlFile.state = DB_SHUTDOWNED;
ControlFile.time = time(NULL);
ControlFile.logId = newXlogId;
ControlFile.logSeg = newXlogSeg + 1;
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
ControlFile.prevCheckPoint.xlogid = 0;
ControlFile.prevCheckPoint.xrecoff = 0;
/* Contents are protected with a CRC */
INIT_CRC64(ControlFile.crc);
COMP_CRC64(ControlFile.crc,
(char *) &ControlFile + sizeof(crc64),
sizeof(ControlFileData) - sizeof(crc64));
FIN_CRC64(ControlFile.crc);
/*
* We write out BLCKSZ bytes into pg_control, zero-padding the excess
* over sizeof(ControlFileData). This reduces the odds of
* premature-EOF errors when reading pg_control. We'll still fail
* when we check the contents of the file, but hopefully with a more
* specific error than "couldn't read pg_control".
*/
if (sizeof(ControlFileData) > BLCKSZ)
{
fprintf(stderr, "sizeof(ControlFileData) is too large ... fix xlog.c\n");
exit(1);
}
memset(buffer, 0, BLCKSZ);
memcpy(buffer, &ControlFile, sizeof(ControlFileData));
unlink(ControlFilePath);
fd = open(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR);
if (fd < 0)
{
perror("RewriteControlFile failed to create pg_control file");
exit(1);
}
errno = 0;
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
perror("RewriteControlFile failed to write pg_control file");
exit(1);
}
if (fsync(fd) != 0)
{
perror("fsync");
exit(1);
}
close(fd);
}
/*
* Remove existing XLOG files
*/
static void
KillExistingXLOG(void)
{
DIR *xldir;
struct dirent *xlde;
char path[MAXPGPATH];
xldir = opendir(XLogDir);
if (xldir == NULL)
{
perror("KillExistingXLOG: cannot open $PGDATA/pg_xlog directory");
exit(1);
}
errno = 0;
while ((xlde = readdir(xldir)) != NULL)
{
if (strlen(xlde->d_name) == 16 &&
strspn(xlde->d_name, "0123456789ABCDEF") == 16)
{
sprintf(path, "%s/%s", XLogDir, xlde->d_name);
if (unlink(path) < 0)
{
perror(path);
exit(1);
}
}
errno = 0;
}
if (errno)
{
perror("KillExistingXLOG: cannot read $PGDATA/pg_xlog directory");
exit(1);
}
closedir(xldir);
}
/*
* Write an empty XLOG file, containing only the checkpoint record
* already set up in ControlFile.
*/
static void
WriteEmptyXLOG(void)
{
char *buffer;
XLogPageHeader page;
XLogRecord *record;
crc64 crc;
char path[MAXPGPATH];
int fd;
int nbytes;
/* Use malloc() to ensure buffer is MAXALIGNED */
buffer = (char *) malloc(BLCKSZ);
page = (XLogPageHeader) buffer;
/* Set up the first page with initial record */
memset(buffer, 0, BLCKSZ);
page->xlp_magic = XLOG_PAGE_MAGIC;
page->xlp_info = 0;
page->xlp_sui = ControlFile.checkPointCopy.ThisStartUpID;
record = (XLogRecord *) ((char *) page + SizeOfXLogPHD);
record->xl_prev.xlogid = 0;
record->xl_prev.xrecoff = 0;
record->xl_xact_prev = record->xl_prev;
record->xl_xid = InvalidTransactionId;
record->xl_len = sizeof(CheckPoint);
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
record->xl_rmid = RM_XLOG_ID;
memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
sizeof(CheckPoint));
INIT_CRC64(crc);
COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
COMP_CRC64(crc, (char *) record + sizeof(crc64),
SizeOfXLogRecord - sizeof(crc64));
FIN_CRC64(crc);
record->xl_crc = crc;
/* Write the first page */
XLogFileName(path, newXlogId, newXlogSeg);
unlink(path);
fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR);
if (fd < 0)
{
perror(path);
exit(1);
}
errno = 0;
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
perror("WriteEmptyXLOG: failed to write xlog file");
exit(1);
}
/* Fill the rest of the file with zeroes */
memset(buffer, 0, BLCKSZ);
for (nbytes = BLCKSZ; nbytes < XLogSegSize; nbytes += BLCKSZ)
{
errno = 0;
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
{
if (errno == 0)
errno = ENOSPC;
perror("WriteEmptyXLOG: failed to write xlog file");
exit(1);
}
}
if (fsync(fd) != 0)
{
perror("fsync");
exit(1);
}
close(fd);
}
static void
usage(void)
{
fprintf(stderr, "Usage: pg_resetxlog [-f] [-n] PGDataDirectory\n\n"
" -f\tforce update to be done\n"
" -n\tno update, just show extracted pg_control values (for testing)\n");
exit(1);
}
int
main(int argc, char **argv)
{
int argn;
bool force = false;
bool noupdate = false;
int fd;
char path[MAXPGPATH];
for (argn = 1; argn < argc; argn++)
{
if (argv[argn][0] != '-')
break; /* end of switches */
if (strcmp(argv[argn], "-f") == 0)
force = true;
else if (strcmp(argv[argn], "-n") == 0)
noupdate = true;
else
usage();
}
if (argn != argc - 1) /* one required non-switch argument */
usage();
DataDir = argv[argn++];
snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
/*
* Check for a postmaster lock file --- if there is one, refuse to
* proceed, on grounds we might be interfering with a live
* installation.
*/
snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
if ((fd = open(path, O_RDONLY)) < 0)
{
if (errno != ENOENT)
{
perror("Failed to open $PGDATA/postmaster.pid for reading");
exit(1);
}
}
else
{
fprintf(stderr, "Lock file '%s' exists --- is a postmaster running?\n"
"If not, delete the lock file and try again.\n",
path);
exit(1);
}
/*
* Attempt to read the existing pg_control file
*/
if (!ReadControlFile())
GuessControlValues();
/*
* If we had to guess anything, and -f was not given, just print the
* guessed values and exit. Also print if -n is given.
*/
if ((guessed && !force) || noupdate)
{
PrintControlValues();
if (!noupdate)
printf("\nIf these values seem acceptable, use -f to force reset.\n");
exit(1);
}
/*
* Don't reset from a dirty pg_control without -f, either.
*/
if (ControlFile.state != DB_SHUTDOWNED && !force)
{
printf("The database was not shut down cleanly.\n"
"Resetting the xlog may cause data to be lost!\n"
"If you want to proceed anyway, use -f to force reset.\n");
exit(1);
}
/*
* Else, do the dirty deed.
*/
RewriteControlFile();
KillExistingXLOG();
WriteEmptyXLOG();
printf("XLOG reset.\n");
return 0;
}