/*-------------------------------------------------------------------------
 *
 * pg_resetxlog.c
 *	  A utility to "zero out" the xlog when it's corrupt beyond recovery.
 *	  Can also rebuild pg_control if needed.
 *
 * The theory of operation is fairly simple:
 *	  1. Read the existing pg_control (which will include the last
 *		 checkpoint record).  If it is an old format then update to
 *		 current format.
 *	  2. If pg_control is corrupt, attempt to intuit reasonable values,
 *		 by scanning the old xlog if necessary.
 *	  3. Modify pg_control to reflect a "shutdown" state with a checkpoint
 *		 record at the start of xlog.
 *	  4. Flush the existing xlog files and write a new segment with
 *		 just a checkpoint record in it.  The new segment is positioned
 *		 just past the end of the old xlog, so that existing LSNs in
 *		 data pages will appear to be "in the past".
 * This is all pretty straightforward except for the intuition part of
 * step 2 ...
 *
 *
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * $Header: /cvsroot/pgsql/contrib/pg_resetxlog/Attic/pg_resetxlog.c,v 1.10 2001/11/05 17:46:23 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <errno.h>
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#ifdef USE_LOCALE
#include <locale.h>
#endif

#include "access/xlog.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"


/******************** stuff copied from xlog.c ********************/

/* Increment an xlogid/segment pair */
#define NextLogSeg(logId, logSeg)	\
	do { \
		if ((logSeg) >= XLogSegsPerFile-1) \
		{ \
			(logId)++; \
			(logSeg) = 0; \
		} \
		else \
			(logSeg)++; \
	} while (0)

/*
 * Compute ID and segment from an XLogRecPtr.
 *
 * For XLByteToSeg, do the computation at face value.  For XLByteToPrevSeg,
 * a boundary byte is taken to be in the previous segment.	This is suitable
 * for deciding which segment to write given a pointer to a record end,
 * for example.
 */
#define XLByteToSeg(xlrp, logId, logSeg)	\
	( logId = (xlrp).xlogid, \
	  logSeg = (xlrp).xrecoff / XLogSegSize \
	)
#define XLByteToPrevSeg(xlrp, logId, logSeg)	\
	( logId = (xlrp).xlogid, \
	  logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
	)

/*
 * Is an XLogRecPtr within a particular XLOG segment?
 *
 * For XLByteInSeg, do the computation at face value.  For XLByteInPrevSeg,
 * a boundary byte is taken to be in the previous segment.
 */
#define XLByteInSeg(xlrp, logId, logSeg)	\
	((xlrp).xlogid == (logId) && \
	 (xlrp).xrecoff / XLogSegSize == (logSeg))

#define XLByteInPrevSeg(xlrp, logId, logSeg)	\
	((xlrp).xlogid == (logId) && \
	 ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))


#define XLogFileName(path, log, seg)	\
			snprintf(path, MAXPGPATH, "%s/%08X%08X",	\
					 XLogDir, log, seg)

/*
 * _INTL_MAXLOGRECSZ: max space needed for a record including header and
 * any backup-block data.
 */
#define _INTL_MAXLOGRECSZ	(SizeOfXLogRecord + MAXLOGRECSZ + \
							 XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))

/******************** end of stuff copied from xlog.c ********************/


static char *DataDir;			/* locations of important stuff */
static char XLogDir[MAXPGPATH];
static char ControlFilePath[MAXPGPATH];

static ControlFileData ControlFile;		/* pg_control values */
static uint32 newXlogId,
			newXlogSeg;			/* ID/Segment of new XLOG segment */
static bool guessed = false;	/* T if we had to guess at any values */


static bool CheckControlVersion0(char *buffer, int len);


static int
XLogFileOpen(uint32 log, uint32 seg)
{
	char		path[MAXPGPATH];
	int			fd;

	XLogFileName(path, log, seg);

	fd = open(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
	return (fd);
}


/*
 * Try to read the existing pg_control file.
 *
 * This routine is also responsible for updating old pg_control versions
 * to the current format.
 */
static bool
ReadControlFile(void)
{
	int			fd;
	int			len;
	char	   *buffer;
	crc64		crc;

	if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
	{
		/*
		 * If pg_control is not there at all, or we can't read it, the
		 * odds are we've been handed a bad DataDir path, so give up. User
		 * can do "touch pg_control" to force us to proceed.
		 */
		perror("Failed to open $PGDATA/global/pg_control for reading");
		if (errno == ENOENT)
			fprintf(stderr, "If you're sure the PGDATA path is correct, do\n"
					"  touch %s\n"
					"and try again.\n", ControlFilePath);
		exit(1);
	}

	/* Use malloc to ensure we have a maxaligned buffer */
	buffer = (char *) malloc(BLCKSZ);

	len = read(fd, buffer, BLCKSZ);
	if (len < 0)
	{
		perror("Failed to read $PGDATA/global/pg_control");
		exit(1);
	}
	close(fd);

	if (len >= sizeof(ControlFileData) &&
		((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
	{
		/* Seems to be current version --- check the CRC. */
		INIT_CRC64(crc);
		COMP_CRC64(crc,
				   buffer + sizeof(crc64),
				   sizeof(ControlFileData) - sizeof(crc64));
		FIN_CRC64(crc);

		if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc))
		{
			/* Valid data... */
			memcpy(&ControlFile, buffer, sizeof(ControlFile));
			return true;
		}

		fprintf(stderr, "pg_control exists but has invalid CRC; proceed with caution.\n");
		/* We will use the data anyway, but treat it as guessed. */
		memcpy(&ControlFile, buffer, sizeof(ControlFile));
		guessed = true;
		return true;
	}

	/*
	 * Maybe it's a 7.1beta pg_control.
	 */
	if (CheckControlVersion0(buffer, len))
		return true;

	/* Looks like it's a mess. */
	fprintf(stderr, "pg_control exists but is broken or unknown version; ignoring it.\n");
	return false;
}


/******************* routines for old XLOG format *******************/


/*
 * This format was in use in 7.1 beta releases through 7.1beta5.  The
 * pg_control layout was different, and so were the XLOG page headers.
 * The XLOG record header format was physically the same as 7.1 release,
 * but interpretation of the xl_len field was not.
 */

typedef struct crc64V0
{
	uint32		crc1;
	uint32		crc2;
}	crc64V0;

static uint32 crc_tableV0[] = {
	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
	0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
	0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
	0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
	0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};

#define INIT_CRC64V0(crc)	((crc).crc1 = 0xffffffff, (crc).crc2 = 0xffffffff)
#define FIN_CRC64V0(crc)	((crc).crc1 ^= 0xffffffff, (crc).crc2 ^= 0xffffffff)
#define COMP_CRC64V0(crc, data, len)	\
do {\
		uint32		 __c1 = (crc).crc1;\
		uint32		 __c2 = (crc).crc2;\
		char		*__data = (char *) (data);\
		uint32		 __len = (len);\
\
		while (__len >= 2)\
		{\
				__c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
				__c2 = crc_tableV0[(__c2 ^ *__data++) & 0xff] ^ (__c2 >> 8);\
				__len -= 2;\
		}\
		if (__len > 0)\
				__c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
		(crc).crc1 = __c1;\
		(crc).crc2 = __c2;\
} while (0)

#define EQ_CRC64V0(c1,c2)  ((c1).crc1 == (c2).crc1 && (c1).crc2 == (c2).crc2)


#define LOCALE_NAME_BUFLEN_V0  128

typedef struct ControlFileDataV0
{
	crc64V0		crc;
	uint32		logId;			/* current log file id */
	uint32		logSeg;			/* current log file segment (1-based) */
	XLogRecPtr	checkPoint;		/* last check point record ptr */
	time_t		time;			/* time stamp of last modification */
	DBState		state;			/* see enum above */
	uint32		blcksz;			/* block size for this DB */
	uint32		relseg_size;	/* blocks per segment of large relation */
	uint32		catalog_version_no;		/* internal version number */
	char		lc_collate[LOCALE_NAME_BUFLEN_V0];
	char		lc_ctype[LOCALE_NAME_BUFLEN_V0];
	char		archdir[MAXPGPATH];		/* where to move offline log files */
}	ControlFileDataV0;

typedef struct CheckPointV0
{
	XLogRecPtr	redo;			/* next RecPtr available when we */
	/* began to create CheckPoint */
	/* (i.e. REDO start point) */
	XLogRecPtr	undo;			/* first record of oldest in-progress */
	/* transaction when we started */
	/* (i.e. UNDO end point) */
	StartUpID	ThisStartUpID;
	TransactionId nextXid;
	Oid			nextOid;
	bool		Shutdown;
}	CheckPointV0;

typedef struct XLogRecordV0
{
	crc64V0		xl_crc;
	XLogRecPtr	xl_prev;		/* ptr to previous record in log */
	XLogRecPtr	xl_xact_prev;	/* ptr to previous record of this xact */
	TransactionId xl_xid;		/* xact id */
	uint16		xl_len;			/* total len of record *data* */
	uint8		xl_info;
	RmgrId		xl_rmid;		/* resource manager inserted this record */
}	XLogRecordV0;

#define SizeOfXLogRecordV0	DOUBLEALIGN(sizeof(XLogRecordV0))

typedef struct XLogContRecordV0
{
	uint16		xl_len;			/* len of data left */
}	XLogContRecordV0;

#define SizeOfXLogContRecordV0	DOUBLEALIGN(sizeof(XLogContRecordV0))

#define XLOG_PAGE_MAGIC_V0 0x17345168

typedef struct XLogPageHeaderDataV0
{
	uint32		xlp_magic;
	uint16		xlp_info;
}	XLogPageHeaderDataV0;

#define SizeOfXLogPHDV0   DOUBLEALIGN(sizeof(XLogPageHeaderDataV0))

typedef XLogPageHeaderDataV0 *XLogPageHeaderV0;


static bool RecordIsValidV0(XLogRecordV0 * record);
static XLogRecordV0 *ReadRecordV0(XLogRecPtr *RecPtr, char *buffer);
static bool ValidXLOGHeaderV0(XLogPageHeaderV0 hdr);


/*
 * Try to interpret pg_control contents as "version 0" format.
 */
static bool
CheckControlVersion0(char *buffer, int len)
{
	crc64V0		crc;
	ControlFileDataV0 *oldfile;
	XLogRecordV0 *record;
	CheckPointV0 *oldchkpt;

	if (len < sizeof(ControlFileDataV0))
		return false;
	/* Check CRC the version-0 way. */
	INIT_CRC64V0(crc);
	COMP_CRC64V0(crc,
				 buffer + sizeof(crc64V0),
				 sizeof(ControlFileDataV0) - sizeof(crc64V0));
	FIN_CRC64V0(crc);

	if (!EQ_CRC64V0(crc, ((ControlFileDataV0 *) buffer)->crc))
		return false;

	/* Valid data, convert useful fields to new-style pg_control format */
	oldfile = (ControlFileDataV0 *) buffer;

	memset(&ControlFile, 0, sizeof(ControlFile));

	ControlFile.pg_control_version = PG_CONTROL_VERSION;
	ControlFile.catalog_version_no = oldfile->catalog_version_no;

	ControlFile.state = oldfile->state;
	ControlFile.logId = oldfile->logId;
	ControlFile.logSeg = oldfile->logSeg;

	ControlFile.blcksz = oldfile->blcksz;
	ControlFile.relseg_size = oldfile->relseg_size;
	strcpy(ControlFile.lc_collate, oldfile->lc_collate);
	strcpy(ControlFile.lc_ctype, oldfile->lc_ctype);

	/*
	 * Since this format did not include a copy of the latest checkpoint
	 * record, we have to go rooting in the old XLOG to get that.
	 */
	record = ReadRecordV0(&oldfile->checkPoint,
						  (char *) malloc(_INTL_MAXLOGRECSZ));
	if (record == NULL)
	{
		/*
		 * We have to guess at the checkpoint contents.
		 */
		guessed = true;
		ControlFile.checkPointCopy.ThisStartUpID = 0;
		ControlFile.checkPointCopy.nextXid = (TransactionId) 514;		/* XXX */
		ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
		return true;
	}
	oldchkpt = (CheckPointV0 *) XLogRecGetData(record);

	ControlFile.checkPointCopy.ThisStartUpID = oldchkpt->ThisStartUpID;
	ControlFile.checkPointCopy.nextXid = oldchkpt->nextXid;
	ControlFile.checkPointCopy.nextOid = oldchkpt->nextOid;

	return true;
}

/*
 * CRC-check an XLOG V0 record.  We do not believe the contents of an XLOG
 * record (other than to the minimal extent of computing the amount of
 * data to read in) until we've checked the CRCs.
 *
 * We assume all of the record has been read into memory at *record.
 */
static bool
RecordIsValidV0(XLogRecordV0 * record)
{
	crc64V0		crc;
	uint32		len = record->xl_len;

	/*
	 * NB: this code is not right for V0 records containing backup blocks,
	 * but for now it's only going to be applied to checkpoint records, so
	 * I'm not going to worry about it...
	 */
	INIT_CRC64V0(crc);
	COMP_CRC64V0(crc, XLogRecGetData(record), len);
	COMP_CRC64V0(crc, (char *) record + sizeof(crc64V0),
				 SizeOfXLogRecordV0 - sizeof(crc64V0));
	FIN_CRC64V0(crc);

	if (!EQ_CRC64V0(record->xl_crc, crc))
		return false;

	return (true);
}

/*
 * Attempt to read an XLOG V0 record at recptr.
 *
 * If no valid record is available, returns NULL.
 *
 * buffer is a workspace at least _INTL_MAXLOGRECSZ bytes long.  It is needed
 * to reassemble a record that crosses block boundaries.  Note that on
 * successful return, the returned record pointer always points at buffer.
 */
static XLogRecordV0 *
ReadRecordV0(XLogRecPtr *RecPtr, char *buffer)
{
	static int	readFile = -1;
	static uint32 readId = 0;
	static uint32 readSeg = 0;
	static uint32 readOff = 0;
	static char *readBuf = NULL;

	XLogRecordV0 *record;
	uint32		len,
				total_len;
	uint32		targetPageOff;

	if (readBuf == NULL)
		readBuf = (char *) malloc(BLCKSZ);

	XLByteToSeg(*RecPtr, readId, readSeg);
	if (readFile < 0)
	{
		readFile = XLogFileOpen(readId, readSeg);
		if (readFile < 0)
			goto next_record_is_invalid;
		readOff = (uint32) (-1);	/* force read to occur below */
	}

	targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ;
	if (readOff != targetPageOff)
	{
		readOff = targetPageOff;
		if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
			goto next_record_is_invalid;
		if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
			goto next_record_is_invalid;
		if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
			goto next_record_is_invalid;
	}
	if ((((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
		RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHDV0)
		goto next_record_is_invalid;
	record = (XLogRecordV0 *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);

	if (record->xl_len == 0)
		goto next_record_is_invalid;

	/*
	 * Compute total length of record including any appended backup
	 * blocks.
	 */
	total_len = SizeOfXLogRecordV0 + record->xl_len;

	/*
	 * Make sure it will fit in buffer (currently, it is mechanically
	 * impossible for this test to fail, but it seems like a good idea
	 * anyway).
	 */
	if (total_len > _INTL_MAXLOGRECSZ)
		goto next_record_is_invalid;
	len = BLCKSZ - RecPtr->xrecoff % BLCKSZ;
	if (total_len > len)
	{
		/* Need to reassemble record */
		XLogContRecordV0 *contrecord;
		uint32		gotlen = len;

		memcpy(buffer, record, len);
		record = (XLogRecordV0 *) buffer;
		buffer += len;
		for (;;)
		{
			readOff += BLCKSZ;
			if (readOff >= XLogSegSize)
			{
				close(readFile);
				readFile = -1;
				NextLogSeg(readId, readSeg);
				readFile = XLogFileOpen(readId, readSeg);
				if (readFile < 0)
					goto next_record_is_invalid;
				readOff = 0;
			}
			if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
				goto next_record_is_invalid;
			if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
				goto next_record_is_invalid;
			if (!(((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
				goto next_record_is_invalid;
			contrecord = (XLogContRecordV0 *) ((char *) readBuf + SizeOfXLogPHDV0);
			if (contrecord->xl_len == 0 ||
				total_len != (contrecord->xl_len + gotlen))
				goto next_record_is_invalid;
			len = BLCKSZ - SizeOfXLogPHDV0 - SizeOfXLogContRecordV0;
			if (contrecord->xl_len > len)
			{
				memcpy(buffer, (char *) contrecord + SizeOfXLogContRecordV0, len);
				gotlen += len;
				buffer += len;
				continue;
			}
			memcpy(buffer, (char *) contrecord + SizeOfXLogContRecordV0,
				   contrecord->xl_len);
			break;
		}
		if (!RecordIsValidV0(record))
			goto next_record_is_invalid;
		return record;
	}

	/* Record does not cross a page boundary */
	if (!RecordIsValidV0(record))
		goto next_record_is_invalid;
	memcpy(buffer, record, total_len);
	return (XLogRecordV0 *) buffer;

next_record_is_invalid:;
	close(readFile);
	readFile = -1;
	return NULL;
}

/*
 * Check whether the xlog header of a page just read in looks valid.
 *
 * This is just a convenience subroutine to avoid duplicated code in
 * ReadRecord.	It's not intended for use from anywhere else.
 */
static bool
ValidXLOGHeaderV0(XLogPageHeaderV0 hdr)
{
	if (hdr->xlp_magic != XLOG_PAGE_MAGIC_V0)
		return false;
	if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
		return false;
	return true;
}

/******************* end of routines for old XLOG format *******************/


/*
 * Guess at pg_control values when we can't read the old ones.
 */
static void
GuessControlValues(void)
{
#ifdef USE_LOCALE
	char	   *localeptr;
#endif

	/*
	 * Set up a completely default set of pg_control values.
	 */
	guessed = true;
	memset(&ControlFile, 0, sizeof(ControlFile));

	ControlFile.pg_control_version = PG_CONTROL_VERSION;
	ControlFile.catalog_version_no = CATALOG_VERSION_NO;

	ControlFile.checkPointCopy.redo.xlogid = 0;
	ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD;
	ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
	ControlFile.checkPointCopy.ThisStartUpID = 0;
	ControlFile.checkPointCopy.nextXid = (TransactionId) 514;	/* XXX */
	ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
	ControlFile.checkPointCopy.time = time(NULL);

	ControlFile.state = DB_SHUTDOWNED;
	ControlFile.time = time(NULL);
	ControlFile.logId = 0;
	ControlFile.logSeg = 1;
	ControlFile.checkPoint = ControlFile.checkPointCopy.redo;

	ControlFile.blcksz = BLCKSZ;
	ControlFile.relseg_size = RELSEG_SIZE;
#ifdef USE_LOCALE
	localeptr = setlocale(LC_COLLATE, "");
	if (!localeptr)
	{
		fprintf(stderr, "Invalid LC_COLLATE setting\n");
		exit(1);
	}
	StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
	localeptr = setlocale(LC_CTYPE, "");
	if (!localeptr)
	{
		fprintf(stderr, "Invalid LC_CTYPE setting\n");
		exit(1);
	}
	StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
#else
	strcpy(ControlFile.lc_collate, "C");
	strcpy(ControlFile.lc_ctype, "C");
#endif

	/*
	 * XXX eventually, should try to grovel through old XLOG to develop
	 * more accurate values for startupid, nextXID, and nextOID.
	 */
}


/*
 * Print the guessed pg_control values when we had to guess.
 *
 * NB: this display should be just those fields that will not be
 * reset by RewriteControlFile().
 */
static void
PrintControlValues(void)
{
	printf("Guessed-at pg_control values:\n\n"
		   "pg_control version number:            %u\n"
		   "Catalog version number:               %u\n"
		   "Current log file id:                  %u\n"
		   "Next log file segment:                %u\n"
		   "Latest checkpoint's StartUpID:        %u\n"
		   "Latest checkpoint's NextXID:          %u\n"
		   "Latest checkpoint's NextOID:          %u\n"
		   "Database block size:                  %u\n"
		   "Blocks per segment of large relation: %u\n"
		   "LC_COLLATE:                           %s\n"
		   "LC_CTYPE:                             %s\n",

		   ControlFile.pg_control_version,
		   ControlFile.catalog_version_no,
		   ControlFile.logId,
		   ControlFile.logSeg,
		   ControlFile.checkPointCopy.ThisStartUpID,
		   ControlFile.checkPointCopy.nextXid,
		   ControlFile.checkPointCopy.nextOid,
		   ControlFile.blcksz,
		   ControlFile.relseg_size,
		   ControlFile.lc_collate,
		   ControlFile.lc_ctype);
}


/*
 * Write out the new pg_control file.
 */
static void
RewriteControlFile(void)
{
	int			fd;
	char		buffer[BLCKSZ]; /* need not be aligned */

	/*
	 * Adjust fields as needed to force an empty XLOG starting at the next
	 * available segment.
	 */
	newXlogId = ControlFile.logId;
	newXlogSeg = ControlFile.logSeg;
	/* be sure we wrap around correctly at end of a logfile */
	NextLogSeg(newXlogId, newXlogSeg);

	ControlFile.checkPointCopy.redo.xlogid = newXlogId;
	ControlFile.checkPointCopy.redo.xrecoff =
		newXlogSeg * XLogSegSize + SizeOfXLogPHD;
	ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
	ControlFile.checkPointCopy.time = time(NULL);

	ControlFile.state = DB_SHUTDOWNED;
	ControlFile.time = time(NULL);
	ControlFile.logId = newXlogId;
	ControlFile.logSeg = newXlogSeg + 1;
	ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
	ControlFile.prevCheckPoint.xlogid = 0;
	ControlFile.prevCheckPoint.xrecoff = 0;

	/* Contents are protected with a CRC */
	INIT_CRC64(ControlFile.crc);
	COMP_CRC64(ControlFile.crc,
			   (char *) &ControlFile + sizeof(crc64),
			   sizeof(ControlFileData) - sizeof(crc64));
	FIN_CRC64(ControlFile.crc);

	/*
	 * We write out BLCKSZ bytes into pg_control, zero-padding the excess
	 * over sizeof(ControlFileData).  This reduces the odds of
	 * premature-EOF errors when reading pg_control.  We'll still fail
	 * when we check the contents of the file, but hopefully with a more
	 * specific error than "couldn't read pg_control".
	 */
	if (sizeof(ControlFileData) > BLCKSZ)
	{
		fprintf(stderr, "sizeof(ControlFileData) is too large ... fix xlog.c\n");
		exit(1);
	}

	memset(buffer, 0, BLCKSZ);
	memcpy(buffer, &ControlFile, sizeof(ControlFileData));

	unlink(ControlFilePath);

	fd = open(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR);
	if (fd < 0)
	{
		perror("RewriteControlFile failed to create pg_control file");
		exit(1);
	}

	errno = 0;
	if (write(fd, buffer, BLCKSZ) != BLCKSZ)
	{
		/* if write didn't set errno, assume problem is no disk space */
		if (errno == 0)
			errno = ENOSPC;
		perror("RewriteControlFile failed to write pg_control file");
		exit(1);
	}

	if (fsync(fd) != 0)
	{
		perror("fsync");
		exit(1);
	}

	close(fd);
}


/*
 * Remove existing XLOG files
 */
static void
KillExistingXLOG(void)
{
	DIR		   *xldir;
	struct dirent *xlde;
	char		path[MAXPGPATH];

	xldir = opendir(XLogDir);
	if (xldir == NULL)
	{
		perror("KillExistingXLOG: cannot open $PGDATA/pg_xlog directory");
		exit(1);
	}

	errno = 0;
	while ((xlde = readdir(xldir)) != NULL)
	{
		if (strlen(xlde->d_name) == 16 &&
			strspn(xlde->d_name, "0123456789ABCDEF") == 16)
		{
			sprintf(path, "%s/%s", XLogDir, xlde->d_name);
			if (unlink(path) < 0)
			{
				perror(path);
				exit(1);
			}
		}
		errno = 0;
	}
	if (errno)
	{
		perror("KillExistingXLOG: cannot read $PGDATA/pg_xlog directory");
		exit(1);
	}
	closedir(xldir);
}


/*
 * Write an empty XLOG file, containing only the checkpoint record
 * already set up in ControlFile.
 */
static void
WriteEmptyXLOG(void)
{
	char	   *buffer;
	XLogPageHeader page;
	XLogRecord *record;
	crc64		crc;
	char		path[MAXPGPATH];
	int			fd;
	int			nbytes;

	/* Use malloc() to ensure buffer is MAXALIGNED */
	buffer = (char *) malloc(BLCKSZ);
	page = (XLogPageHeader) buffer;

	/* Set up the first page with initial record */
	memset(buffer, 0, BLCKSZ);
	page->xlp_magic = XLOG_PAGE_MAGIC;
	page->xlp_info = 0;
	page->xlp_sui = ControlFile.checkPointCopy.ThisStartUpID;
	page->xlp_pageaddr.xlogid =
		ControlFile.checkPointCopy.redo.xlogid;
	page->xlp_pageaddr.xrecoff =
		ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogPHD;
	record = (XLogRecord *) ((char *) page + SizeOfXLogPHD);
	record->xl_prev.xlogid = 0;
	record->xl_prev.xrecoff = 0;
	record->xl_xact_prev = record->xl_prev;
	record->xl_xid = InvalidTransactionId;
	record->xl_len = sizeof(CheckPoint);
	record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
	record->xl_rmid = RM_XLOG_ID;
	memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
		   sizeof(CheckPoint));

	INIT_CRC64(crc);
	COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
	COMP_CRC64(crc, (char *) record + sizeof(crc64),
			   SizeOfXLogRecord - sizeof(crc64));
	FIN_CRC64(crc);
	record->xl_crc = crc;

	/* Write the first page */
	XLogFileName(path, newXlogId, newXlogSeg);

	unlink(path);

	fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
			  S_IRUSR | S_IWUSR);
	if (fd < 0)
	{
		perror(path);
		exit(1);
	}

	errno = 0;
	if (write(fd, buffer, BLCKSZ) != BLCKSZ)
	{
		/* if write didn't set errno, assume problem is no disk space */
		if (errno == 0)
			errno = ENOSPC;
		perror("WriteEmptyXLOG: failed to write xlog file");
		exit(1);
	}

	/* Fill the rest of the file with zeroes */
	memset(buffer, 0, BLCKSZ);
	for (nbytes = BLCKSZ; nbytes < XLogSegSize; nbytes += BLCKSZ)
	{
		errno = 0;
		if (write(fd, buffer, BLCKSZ) != BLCKSZ)
		{
			if (errno == 0)
				errno = ENOSPC;
			perror("WriteEmptyXLOG: failed to write xlog file");
			exit(1);
		}
	}

	if (fsync(fd) != 0)
	{
		perror("fsync");
		exit(1);
	}

	close(fd);
}


static void
usage(void)
{
	fprintf(stderr, "Usage: pg_resetxlog [-f] [-n] PGDataDirectory\n\n"
			"  -f\tforce update to be done\n"
			"  -n\tno update, just show extracted pg_control values (for testing)\n");
	exit(1);
}


int
main(int argc, char **argv)
{
	int			argn;
	bool		force = false;
	bool		noupdate = false;
	int			fd;
	char		path[MAXPGPATH];

	for (argn = 1; argn < argc; argn++)
	{
		if (argv[argn][0] != '-')
			break;				/* end of switches */
		if (strcmp(argv[argn], "-f") == 0)
			force = true;
		else if (strcmp(argv[argn], "-n") == 0)
			noupdate = true;
		else
			usage();
	}

	if (argn != argc - 1)		/* one required non-switch argument */
		usage();

	DataDir = argv[argn++];

	snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);

	snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);

	/*
	 * Check for a postmaster lock file --- if there is one, refuse to
	 * proceed, on grounds we might be interfering with a live
	 * installation.
	 */
	snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);

	if ((fd = open(path, O_RDONLY)) < 0)
	{
		if (errno != ENOENT)
		{
			perror("Failed to open $PGDATA/postmaster.pid for reading");
			exit(1);
		}
	}
	else
	{
		fprintf(stderr, "Lock file '%s' exists --- is a postmaster running?\n"
				"If not, delete the lock file and try again.\n",
				path);
		exit(1);
	}

	/*
	 * Attempt to read the existing pg_control file
	 */
	if (!ReadControlFile())
		GuessControlValues();

	/*
	 * If we had to guess anything, and -f was not given, just print the
	 * guessed values and exit.  Also print if -n is given.
	 */
	if ((guessed && !force) || noupdate)
	{
		PrintControlValues();
		if (!noupdate)
			printf("\nIf these values seem acceptable, use -f to force reset.\n");
		exit(1);
	}

	/*
	 * Don't reset from a dirty pg_control without -f, either.
	 */
	if (ControlFile.state != DB_SHUTDOWNED && !force)
	{
		printf("The database was not shut down cleanly.\n"
			   "Resetting the xlog may cause data to be lost!\n"
			   "If you want to proceed anyway, use -f to force reset.\n");
		exit(1);
	}

	/*
	 * Else, do the dirty deed.
	 */
	RewriteControlFile();
	KillExistingXLOG();
	WriteEmptyXLOG();

	printf("XLOG reset.\n");
	return 0;
}