mirror of
https://github.com/postgres/postgres.git
synced 2025-05-11 05:41:32 +03:00
353 lines
7.9 KiB
C
353 lines
7.9 KiB
C
/*
|
|
* file.c
|
|
*
|
|
* file system operations
|
|
*
|
|
* Copyright (c) 2010-2016, PostgreSQL Global Development Group
|
|
* src/bin/pg_upgrade/file.c
|
|
*/
|
|
|
|
#include "postgres_fe.h"
|
|
|
|
#include "access/visibilitymap.h"
|
|
#include "pg_upgrade.h"
|
|
#include "storage/bufpage.h"
|
|
#include "storage/checksum.h"
|
|
#include "storage/checksum_impl.h"
|
|
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
|
|
#define BITS_PER_HEAPBLOCK_OLD 1
|
|
|
|
|
|
#ifndef WIN32
|
|
static int copy_file(const char *fromfile, const char *tofile);
|
|
#else
|
|
static int win32_pghardlink(const char *src, const char *dst);
|
|
#endif
|
|
|
|
|
|
/*
|
|
* copyFile()
|
|
*
|
|
* Copies a relation file from src to dst.
|
|
*/
|
|
const char *
|
|
copyFile(const char *src, const char *dst)
|
|
{
|
|
#ifndef WIN32
|
|
if (copy_file(src, dst) == -1)
|
|
#else
|
|
if (CopyFile(src, dst, true) == 0)
|
|
#endif
|
|
return getErrorText();
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/*
|
|
* linkFile()
|
|
*
|
|
* Creates a hard link between the given relation files. We use
|
|
* this function to perform a true in-place update. If the on-disk
|
|
* format of the new cluster is bit-for-bit compatible with the on-disk
|
|
* format of the old cluster, we can simply link each relation
|
|
* instead of copying the data from the old cluster to the new cluster.
|
|
*/
|
|
const char *
|
|
linkFile(const char *src, const char *dst)
|
|
{
|
|
if (pg_link_file(src, dst) == -1)
|
|
return getErrorText();
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
|
|
#ifndef WIN32
|
|
static int
|
|
copy_file(const char *srcfile, const char *dstfile)
|
|
{
|
|
#define COPY_BUF_SIZE (50 * BLCKSZ)
|
|
|
|
int src_fd;
|
|
int dest_fd;
|
|
char *buffer;
|
|
int ret = 0;
|
|
int save_errno = 0;
|
|
|
|
if ((srcfile == NULL) || (dstfile == NULL))
|
|
{
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if ((src_fd = open(srcfile, O_RDONLY, 0)) < 0)
|
|
return -1;
|
|
|
|
if ((dest_fd = open(dstfile, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0)
|
|
{
|
|
save_errno = errno;
|
|
|
|
if (src_fd != 0)
|
|
close(src_fd);
|
|
|
|
errno = save_errno;
|
|
return -1;
|
|
}
|
|
|
|
buffer = (char *) pg_malloc(COPY_BUF_SIZE);
|
|
|
|
/* perform data copying i.e read src source, write to destination */
|
|
while (true)
|
|
{
|
|
ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
|
|
|
|
if (nbytes < 0)
|
|
{
|
|
save_errno = errno;
|
|
ret = -1;
|
|
break;
|
|
}
|
|
|
|
if (nbytes == 0)
|
|
break;
|
|
|
|
errno = 0;
|
|
|
|
if (write(dest_fd, buffer, nbytes) != nbytes)
|
|
{
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
if (errno == 0)
|
|
errno = ENOSPC;
|
|
save_errno = errno;
|
|
ret = -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
pg_free(buffer);
|
|
|
|
if (src_fd != 0)
|
|
close(src_fd);
|
|
|
|
if (dest_fd != 0)
|
|
close(dest_fd);
|
|
|
|
if (save_errno != 0)
|
|
errno = save_errno;
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
|
|
/*
|
|
* rewriteVisibilityMap()
|
|
*
|
|
* In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
|
|
* visibility map included one bit per heap page; it now includes two.
|
|
* When upgrading a cluster from before that time to a current PostgreSQL
|
|
* version, we could refuse to copy visibility maps from the old cluster
|
|
* to the new cluster; the next VACUUM would recreate them, but at the
|
|
* price of scanning the entire table. So, instead, we rewrite the old
|
|
* visibility maps in the new format. That way, the all-visible bit
|
|
* remains set for the pages for which it was set previously. The
|
|
* all-frozen bit is never set by this conversion; we leave that to
|
|
* VACUUM.
|
|
*/
|
|
const char *
|
|
rewriteVisibilityMap(const char *fromfile, const char *tofile)
|
|
{
|
|
int src_fd = 0;
|
|
int dst_fd = 0;
|
|
char buffer[BLCKSZ];
|
|
ssize_t bytesRead;
|
|
ssize_t totalBytesRead = 0;
|
|
ssize_t src_filesize;
|
|
int rewriteVmBytesPerPage;
|
|
BlockNumber new_blkno = 0;
|
|
struct stat statbuf;
|
|
|
|
/* Compute we need how many old page bytes to rewrite a new page */
|
|
rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
|
|
|
|
if ((fromfile == NULL) || (tofile == NULL))
|
|
return "Invalid old file or new file";
|
|
|
|
if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0)
|
|
return getErrorText();
|
|
|
|
if (fstat(src_fd, &statbuf) != 0)
|
|
{
|
|
close(src_fd);
|
|
return getErrorText();
|
|
}
|
|
|
|
if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0)
|
|
{
|
|
close(src_fd);
|
|
return getErrorText();
|
|
}
|
|
|
|
/* Save old file size */
|
|
src_filesize = statbuf.st_size;
|
|
|
|
/*
|
|
* Turn each visibility map page into 2 pages one by one. Each new page
|
|
* has the same page header as the old one. If the last section of last
|
|
* page is empty, we skip it, mostly to avoid turning one-page visibility
|
|
* maps for small relations into two pages needlessly.
|
|
*/
|
|
while (totalBytesRead < src_filesize)
|
|
{
|
|
char *old_cur;
|
|
char *old_break;
|
|
char *old_blkend;
|
|
PageHeaderData pageheader;
|
|
bool old_lastblk;
|
|
|
|
if ((bytesRead = read(src_fd, buffer, BLCKSZ)) != BLCKSZ)
|
|
{
|
|
close(dst_fd);
|
|
close(src_fd);
|
|
return getErrorText();
|
|
}
|
|
|
|
totalBytesRead += BLCKSZ;
|
|
old_lastblk = (totalBytesRead == src_filesize);
|
|
|
|
/* Save the page header data */
|
|
memcpy(&pageheader, buffer, SizeOfPageHeaderData);
|
|
|
|
/*
|
|
* These old_* variables point to old visibility map page. old_cur
|
|
* points to current position on old page. old_blkend points to end of
|
|
* old block. old_break points to old page break position for
|
|
* rewriting a new page. After wrote a new page, old_break proceeds
|
|
* rewriteVmBytesPerPage bytes.
|
|
*/
|
|
old_cur = buffer + SizeOfPageHeaderData;
|
|
old_blkend = buffer + bytesRead;
|
|
old_break = old_cur + rewriteVmBytesPerPage;
|
|
|
|
while (old_blkend >= old_break)
|
|
{
|
|
char new_vmbuf[BLCKSZ];
|
|
char *new_cur = new_vmbuf;
|
|
bool empty = true;
|
|
bool old_lastpart;
|
|
|
|
/* Copy page header in advance */
|
|
memcpy(new_vmbuf, &pageheader, SizeOfPageHeaderData);
|
|
|
|
/* Rewrite the last part of the old page? */
|
|
old_lastpart = old_lastblk && (old_blkend == old_break);
|
|
|
|
new_cur += SizeOfPageHeaderData;
|
|
|
|
/* Process old page bytes one by one, and turn it into new page. */
|
|
while (old_break > old_cur)
|
|
{
|
|
uint16 new_vmbits = 0;
|
|
int i;
|
|
|
|
/* Generate new format bits while keeping old information */
|
|
for (i = 0; i < BITS_PER_BYTE; i++)
|
|
{
|
|
uint8 byte = *(uint8 *) old_cur;
|
|
|
|
if (byte & (1 << (BITS_PER_HEAPBLOCK_OLD * i)))
|
|
{
|
|
empty = false;
|
|
new_vmbits |= 1 << (BITS_PER_HEAPBLOCK * i);
|
|
}
|
|
}
|
|
|
|
/* Copy new visibility map bit to new format page */
|
|
memcpy(new_cur, &new_vmbits, BITS_PER_HEAPBLOCK);
|
|
|
|
old_cur += BITS_PER_HEAPBLOCK_OLD;
|
|
new_cur += BITS_PER_HEAPBLOCK;
|
|
}
|
|
|
|
/* If the last part of the old page is empty, skip writing it */
|
|
if (old_lastpart && empty)
|
|
break;
|
|
|
|
/* Set new checksum for a visibility map page (if enabled) */
|
|
if (old_cluster.controldata.data_checksum_version != 0 &&
|
|
new_cluster.controldata.data_checksum_version != 0)
|
|
((PageHeader) new_vmbuf)->pd_checksum =
|
|
pg_checksum_page(new_vmbuf, new_blkno);
|
|
|
|
if (write(dst_fd, new_vmbuf, BLCKSZ) != BLCKSZ)
|
|
{
|
|
close(dst_fd);
|
|
close(src_fd);
|
|
return getErrorText();
|
|
}
|
|
|
|
old_break += rewriteVmBytesPerPage;
|
|
new_blkno++;
|
|
}
|
|
}
|
|
|
|
/* Close files */
|
|
close(dst_fd);
|
|
close(src_fd);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
void
|
|
check_hard_link(void)
|
|
{
|
|
char existing_file[MAXPGPATH];
|
|
char new_link_file[MAXPGPATH];
|
|
|
|
snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
|
|
snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
|
|
unlink(new_link_file); /* might fail */
|
|
|
|
if (pg_link_file(existing_file, new_link_file) == -1)
|
|
{
|
|
pg_fatal("Could not create hard link between old and new data directories: %s\n"
|
|
"In link mode the old and new data directories must be on the same file system volume.\n",
|
|
getErrorText());
|
|
}
|
|
unlink(new_link_file);
|
|
}
|
|
|
|
#ifdef WIN32
|
|
static int
|
|
win32_pghardlink(const char *src, const char *dst)
|
|
{
|
|
/*
|
|
* CreateHardLinkA returns zero for failure
|
|
* http://msdn.microsoft.com/en-us/library/aa363860(VS.85).aspx
|
|
*/
|
|
if (CreateHardLinkA(dst, src, NULL) == 0)
|
|
return -1;
|
|
else
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
|
|
/* fopen() file with no group/other permissions */
|
|
FILE *
|
|
fopen_priv(const char *path, const char *mode)
|
|
{
|
|
mode_t old_umask = umask(S_IRWXG | S_IRWXO);
|
|
FILE *fp;
|
|
|
|
fp = fopen(path, mode);
|
|
umask(old_umask);
|
|
|
|
return fp;
|
|
}
|