1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-15 03:41:20 +03:00

Switch to CRC-32C in WAL and other places.

The old algorithm was found to not be the usual CRC-32 algorithm, used by
Ethernet et al. We were using a non-reflected lookup table with code meant
for a reflected lookup table. That's a strange combination that AFAICS does
not correspond to any bit-wise CRC calculation, which makes it difficult to
reason about its properties. Although it has worked well in practice, seems
safer to use a well-known algorithm.

Since we're changing the algorithm anyway, we might as well choose a
different polynomial. The Castagnoli polynomial has better error-correcting
properties than the traditional CRC-32 polynomial, even if we had
implemented it correctly. Another reason for picking that is that some new
CPUs have hardware support for calculating CRC-32C, but not CRC-32, let
alone our strange variant of it. This patch doesn't add any support for such
hardware, but a future patch could now do that.

The old algorithm is kept around for tsquery and pg_trgm, which use the
values in indexes that need to remain compatible so that pg_upgrade works.
While we're at it, share the old lookup table for CRC-32 calculation
between hstore, ltree and core. They all use the same table, so might as
well.
This commit is contained in:
Heikki Linnakangas
2014-11-04 11:35:15 +02:00
parent 404bc51cde
commit 5028f22f6e
20 changed files with 299 additions and 340 deletions

View File

@@ -847,9 +847,9 @@ TwoPhaseGetDummyProc(TransactionId xid)
* 6. TwoPhaseRecordOnDisk
* 7. ...
* 8. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
* 9. CRC32
* 9. checksum (CRC-32C)
*
* Each segment except the final CRC32 is MAXALIGN'd.
* Each segment except the final checksum is MAXALIGN'd.
*/
/*
@@ -1056,11 +1056,11 @@ EndPrepare(GlobalTransaction gxact)
path)));
/* Write data to file, and calculate CRC as we pass over it */
INIT_CRC32(statefile_crc);
INIT_CRC32C(statefile_crc);
for (record = records.head; record != NULL; record = record->next)
{
COMP_CRC32(statefile_crc, record->data, record->len);
COMP_CRC32C(statefile_crc, record->data, record->len);
if ((write(fd, record->data, record->len)) != record->len)
{
CloseTransientFile(fd);
@@ -1070,7 +1070,7 @@ EndPrepare(GlobalTransaction gxact)
}
}
FIN_CRC32(statefile_crc);
FIN_CRC32C(statefile_crc);
/*
* Write a deliberately bogus CRC to the state file; this is just paranoia
@@ -1289,13 +1289,13 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
return NULL;
}
INIT_CRC32(calc_crc);
COMP_CRC32(calc_crc, buf, crc_offset);
FIN_CRC32(calc_crc);
INIT_CRC32C(calc_crc);
COMP_CRC32C(calc_crc, buf, crc_offset);
FIN_CRC32C(calc_crc);
file_crc = *((pg_crc32 *) (buf + crc_offset));
if (!EQ_CRC32(calc_crc, file_crc))
if (!EQ_CRC32C(calc_crc, file_crc))
{
pfree(buf);
return NULL;
@@ -1540,9 +1540,9 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
int fd;
/* Recompute CRC */
INIT_CRC32(statefile_crc);
COMP_CRC32(statefile_crc, content, len);
FIN_CRC32(statefile_crc);
INIT_CRC32C(statefile_crc);
COMP_CRC32C(statefile_crc, content, len);
FIN_CRC32C(statefile_crc);
TwoPhaseFilePath(path, xid);

View File

@@ -1059,9 +1059,9 @@ begin:;
* the whole record in the order: rdata, then backup blocks, then record
* header.
*/
INIT_CRC32(rdata_crc);
INIT_CRC32C(rdata_crc);
for (rdt = rdata; rdt != NULL; rdt = rdt->next)
COMP_CRC32(rdata_crc, rdt->data, rdt->len);
COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
/*
* Construct record header (prev-link is filled in later, after reserving
@@ -1076,7 +1076,7 @@ begin:;
rechdr->xl_info = info;
rechdr->xl_rmid = rmid;
rechdr->xl_prev = InvalidXLogRecPtr;
COMP_CRC32(rdata_crc, ((char *) rechdr), offsetof(XLogRecord, xl_prev));
COMP_CRC32C(rdata_crc, ((char *) rechdr), offsetof(XLogRecord, xl_prev));
hdr_rdt.next = rdata;
hdr_rdt.data = (char *) rechdr;
@@ -1193,8 +1193,8 @@ begin:;
* Now that xl_prev has been filled in, finish CRC calculation of the
* record header.
*/
COMP_CRC32(rdata_crc, ((char *) &rechdr->xl_prev), sizeof(XLogRecPtr));
FIN_CRC32(rdata_crc);
COMP_CRC32C(rdata_crc, ((char *) &rechdr->xl_prev), sizeof(XLogRecPtr));
FIN_CRC32C(rdata_crc);
rechdr->xl_crc = rdata_crc;
/*
@@ -4344,11 +4344,11 @@ WriteControlFile(void)
ControlFile->float8ByVal = FLOAT8PASSBYVAL;
/* Contents are protected with a CRC */
INIT_CRC32(ControlFile->crc);
COMP_CRC32(ControlFile->crc,
(char *) ControlFile,
offsetof(ControlFileData, crc));
FIN_CRC32(ControlFile->crc);
INIT_CRC32C(ControlFile->crc);
COMP_CRC32C(ControlFile->crc,
(char *) ControlFile,
offsetof(ControlFileData, crc));
FIN_CRC32C(ControlFile->crc);
/*
* We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the
@@ -4444,13 +4444,13 @@ ReadControlFile(void)
errhint("It looks like you need to initdb.")));
/* Now check the CRC. */
INIT_CRC32(crc);
COMP_CRC32(crc,
(char *) ControlFile,
offsetof(ControlFileData, crc));
FIN_CRC32(crc);
INIT_CRC32C(crc);
COMP_CRC32C(crc,
(char *) ControlFile,
offsetof(ControlFileData, crc));
FIN_CRC32C(crc);
if (!EQ_CRC32(crc, ControlFile->crc))
if (!EQ_CRC32C(crc, ControlFile->crc))
ereport(FATAL,
(errmsg("incorrect checksum in control file")));
@@ -4593,11 +4593,11 @@ UpdateControlFile(void)
{
int fd;
INIT_CRC32(ControlFile->crc);
COMP_CRC32(ControlFile->crc,
(char *) ControlFile,
offsetof(ControlFileData, crc));
FIN_CRC32(ControlFile->crc);
INIT_CRC32C(ControlFile->crc);
COMP_CRC32C(ControlFile->crc,
(char *) ControlFile,
offsetof(ControlFileData, crc));
FIN_CRC32C(ControlFile->crc);
fd = BasicOpenFile(XLOG_CONTROL_FILE,
O_RDWR | PG_BINARY,
@@ -4975,10 +4975,10 @@ BootStrapXLOG(void)
record->xl_rmid = RM_XLOG_ID;
memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
INIT_CRC32(crc);
COMP_CRC32(crc, &checkPoint, sizeof(checkPoint));
COMP_CRC32(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32(crc);
INIT_CRC32C(crc);
COMP_CRC32C(crc, &checkPoint, sizeof(checkPoint));
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc);
record->xl_crc = crc;
/* Create first XLOG segment file */

View File

@@ -684,8 +684,8 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
return false;
}
remaining -= SizeOfXLogRecord + len;
INIT_CRC32(crc);
COMP_CRC32(crc, XLogRecGetData(record), len);
INIT_CRC32C(crc);
COMP_CRC32C(crc, XLogRecGetData(record), len);
/* Add in the backup blocks, if any */
blk = (char *) XLogRecGetData(record) + len;
@@ -722,7 +722,7 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
return false;
}
remaining -= blen;
COMP_CRC32(crc, blk, blen);
COMP_CRC32C(crc, blk, blen);
blk += blen;
}
@@ -736,10 +736,10 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
}
/* Finally include the record header */
COMP_CRC32(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32(crc);
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc);
if (!EQ_CRC32(record->xl_crc, crc))
if (!EQ_CRC32C(record->xl_crc, crc))
{
report_invalid_record(state,
"incorrect resource manager data checksum in record at %X/%X",

View File

@@ -1517,9 +1517,9 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
ondisk->magic = SNAPBUILD_MAGIC;
ondisk->version = SNAPBUILD_VERSION;
ondisk->length = needed_length;
INIT_CRC32(ondisk->checksum);
COMP_CRC32(ondisk->checksum,
((char *) ondisk) + SnapBuildOnDiskNotChecksummedSize,
INIT_CRC32C(ondisk->checksum);
COMP_CRC32C(ondisk->checksum,
((char *) ondisk) + SnapBuildOnDiskNotChecksummedSize,
SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
ondisk_c += sizeof(SnapBuildOnDisk);
@@ -1531,20 +1531,20 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
ondisk->builder.running.xip = NULL;
ondisk->builder.committed.xip = NULL;
COMP_CRC32(ondisk->checksum,
&ondisk->builder,
sizeof(SnapBuild));
COMP_CRC32C(ondisk->checksum,
&ondisk->builder,
sizeof(SnapBuild));
/* copy running xacts */
sz = sizeof(TransactionId) * builder->running.xcnt_space;
memcpy(ondisk_c, builder->running.xip, sz);
COMP_CRC32(ondisk->checksum, ondisk_c, sz);
COMP_CRC32C(ondisk->checksum, ondisk_c, sz);
ondisk_c += sz;
/* copy committed xacts */
sz = sizeof(TransactionId) * builder->committed.xcnt;
memcpy(ondisk_c, builder->committed.xip, sz);
COMP_CRC32(ondisk->checksum, ondisk_c, sz);
COMP_CRC32C(ondisk->checksum, ondisk_c, sz);
ondisk_c += sz;
/* we have valid data now, open tempfile and write it there */
@@ -1672,8 +1672,8 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
(errmsg("snapbuild state file \"%s\" has unsupported version %u instead of %u",
path, ondisk.version, SNAPBUILD_VERSION)));
INIT_CRC32(checksum);
COMP_CRC32(checksum,
INIT_CRC32C(checksum);
COMP_CRC32C(checksum,
((char *) &ondisk) + SnapBuildOnDiskNotChecksummedSize,
SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
@@ -1687,7 +1687,7 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
errmsg("could not read file \"%s\", read %d of %d: %m",
path, readBytes, (int) sizeof(SnapBuild))));
}
COMP_CRC32(checksum, &ondisk.builder, sizeof(SnapBuild));
COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
/* restore running xacts information */
sz = sizeof(TransactionId) * ondisk.builder.running.xcnt_space;
@@ -1701,7 +1701,7 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
errmsg("could not read file \"%s\", read %d of %d: %m",
path, readBytes, (int) sz)));
}
COMP_CRC32(checksum, ondisk.builder.running.xip, sz);
COMP_CRC32C(checksum, ondisk.builder.running.xip, sz);
/* restore committed xacts information */
sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
@@ -1715,12 +1715,12 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
errmsg("could not read file \"%s\", read %d of %d: %m",
path, readBytes, (int) sz)));
}
COMP_CRC32(checksum, ondisk.builder.committed.xip, sz);
COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
CloseTransientFile(fd);
/* verify checksum of what we've read */
if (!EQ_CRC32(checksum, ondisk.checksum))
if (!EQ_CRC32C(checksum, ondisk.checksum))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("snapbuild state file %s: checksum mismatch, is %u, should be %u",

View File

@@ -993,7 +993,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
}
cp.magic = SLOT_MAGIC;
INIT_CRC32(cp.checksum);
INIT_CRC32C(cp.checksum);
cp.version = 1;
cp.length = ReplicationSlotOnDiskDynamicSize;
@@ -1003,9 +1003,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
SpinLockRelease(&slot->mutex);
COMP_CRC32(cp.checksum,
(char *) (&cp) + ReplicationSlotOnDiskConstantSize,
ReplicationSlotOnDiskDynamicSize);
COMP_CRC32C(cp.checksum,
(char *) (&cp) + ReplicationSlotOnDiskConstantSize,
ReplicationSlotOnDiskDynamicSize);
if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
{
@@ -1181,13 +1181,13 @@ RestoreSlotFromDisk(const char *name)
CloseTransientFile(fd);
/* now verify the CRC32 */
INIT_CRC32(checksum);
COMP_CRC32(checksum,
(char *) &cp + ReplicationSlotOnDiskConstantSize,
ReplicationSlotOnDiskDynamicSize);
/* now verify the CRC */
INIT_CRC32C(checksum);
COMP_CRC32C(checksum,
(char *) &cp + ReplicationSlotOnDiskConstantSize,
ReplicationSlotOnDiskDynamicSize);
if (!EQ_CRC32(checksum, cp.checksum))
if (!EQ_CRC32C(checksum, cp.checksum))
ereport(PANIC,
(errmsg("replication slot file %s: checksum mismatch, is %u, should be %u",
path, checksum, cp.checksum)));

View File

@@ -201,9 +201,9 @@ gtsvector_compress(PG_FUNCTION_ARGS)
{
pg_crc32 c;
INIT_CRC32(c);
COMP_CRC32(c, words + ptr->pos, ptr->len);
FIN_CRC32(c);
INIT_LEGACY_CRC32(c);
COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len);
FIN_LEGACY_CRC32(c);
*arr = *(int32 *) &c;
arr++;

View File

@@ -280,9 +280,9 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool
errmsg("word is too long in tsquery: \"%s\"",
state->buffer)));
INIT_CRC32(valcrc);
COMP_CRC32(valcrc, strval, lenval);
FIN_CRC32(valcrc);
INIT_LEGACY_CRC32(valcrc);
COMP_LEGACY_CRC32(valcrc, strval, lenval);
FIN_LEGACY_CRC32(valcrc);
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
/* append the value string to state.op, enlarging buffer if needed first */
@@ -883,9 +883,9 @@ tsqueryrecv(PG_FUNCTION_ARGS)
/* Looks valid. */
INIT_CRC32(valcrc);
COMP_CRC32(valcrc, val, val_len);
FIN_CRC32(valcrc);
INIT_LEGACY_CRC32(valcrc);
COMP_LEGACY_CRC32(valcrc, val, val_len);
FIN_LEGACY_CRC32(valcrc);
item->qoperand.weight = weight;
item->qoperand.prefix = (prefix) ? true : false;

View File

@@ -84,7 +84,7 @@ typedef struct RelMapFile
int32 magic; /* always RELMAPPER_FILEMAGIC */
int32 num_mappings; /* number of valid RelMapping entries */
RelMapping mappings[MAX_MAPPINGS];
int32 crc; /* CRC of all above */
pg_crc32 crc; /* CRC of all above */
int32 pad; /* to make the struct size be 512 exactly */
} RelMapFile;
@@ -673,11 +673,11 @@ load_relmap_file(bool shared)
mapfilename)));
/* verify the CRC */
INIT_CRC32(crc);
COMP_CRC32(crc, (char *) map, offsetof(RelMapFile, crc));
FIN_CRC32(crc);
INIT_CRC32C(crc);
COMP_CRC32C(crc, (char *) map, offsetof(RelMapFile, crc));
FIN_CRC32C(crc);
if (!EQ_CRC32(crc, map->crc))
if (!EQ_CRC32C(crc, map->crc))
ereport(FATAL,
(errmsg("relation mapping file \"%s\" contains incorrect checksum",
mapfilename)));
@@ -719,9 +719,9 @@ write_relmap_file(bool shared, RelMapFile *newmap,
if (newmap->num_mappings < 0 || newmap->num_mappings > MAX_MAPPINGS)
elog(ERROR, "attempt to write bogus relation mapping");
INIT_CRC32(newmap->crc);
COMP_CRC32(newmap->crc, (char *) newmap, offsetof(RelMapFile, crc));
FIN_CRC32(newmap->crc);
INIT_CRC32C(newmap->crc);
COMP_CRC32C(newmap->crc, (char *) newmap, offsetof(RelMapFile, crc));
FIN_CRC32C(newmap->crc);
/*
* Open the target file. We prefer to do this before entering the