diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index a275eae5c6a..27d36fddd48 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -262,12 +262,12 @@ PostgreSQL documentation Output a tar-format archive suitable for input - into pg_restore. The tar-format is - compatible with the directory-format; extracting a tar-format + into pg_restore. The tar format is + compatible with the directory format: extracting a tar-format archive produces a valid directory-format archive. - However, the tar-format does not support compression and has a - limit of 8 GB on the size of individual tables. Also, the relative - order of table data items cannot be changed during restore. + However, the tar format does not support compression. Also, when + using tar format the relative order of table data items cannot be + changed during restore. @@ -986,15 +986,6 @@ CREATE DATABASE foo WITH TEMPLATE template0; catalogs might be left in the wrong state. - - Members of tar archives are limited to a size less than 8 GB. - (This is an inherent limitation of the tar file format.) Therefore - this format cannot be used if the textual representation of any one table - exceeds that size. The total size of a tar archive and any of the - other output formats is not limited, except possibly by the - operating system. - - The dump file produced by pg_dump does not contain the statistics used by the optimizer to make diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 0e5f070bdfc..c70b3c8530b 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -34,6 +34,7 @@ #include "utils/elog.h" #include "utils/memutils.h" #include "utils/ps_status.h" +#include "pgtar.h" typedef struct { @@ -1016,49 +1017,6 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces) */ -/* - * Utility routine to print possibly larger than 32 bit integers in a - * portable fashion. Filled with zeros. - */ -static void -print_val(char *s, uint64 val, unsigned int base, size_t len) -{ - int i; - - for (i = len; i > 0; i--) - { - int digit = val % base; - - s[i - 1] = '0' + digit; - val = val / base; - } -} - -/* - * Maximum file size for a tar member: The limit inherent in the - * format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed - * what we can represent in pgoff_t. - */ -#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1) - -static int -_tarChecksum(char *header) -{ - int i, - sum; - - /* - * Per POSIX, the checksum is the simple sum of all bytes in the header, - * treating the bytes as unsigned, and treating the checksum field (at - * offset 148) as though it contained 8 spaces. - */ - sum = 8 * ' '; /* presumed value for checksum field */ - for (i = 0; i < 512; i++) - if (i < 148 || i >= 156) - sum += 0xFF & header[i]; - return sum; -} - /* * Given the member, write the TAR header & send the file. * @@ -1087,15 +1045,6 @@ sendFile(char *readfilename, char *tarfilename, struct stat *statbuf, errmsg("could not open file \"%s\": %m", readfilename))); } - /* - * Some compilers will throw a warning knowing this test can never be true - * because pgoff_t can't exceed the compared maximum on their platform. - */ - if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN) - ereport(ERROR, - (errmsg("archive member \"%s\" too large for tar format", - tarfilename))); - _tarWriteHeader(tarfilename, NULL, statbuf); while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) @@ -1150,95 +1099,10 @@ _tarWriteHeader(const char *filename, const char *linktarget, { char h[512]; - /* - * Note: most of the fields in a tar header are not supposed to be - * null-terminated. We use sprintf, which will write a null after the - * required bytes; that null goes into the first byte of the next field. - * This is okay as long as we fill the fields in order. - */ - memset(h, 0, sizeof(h)); + tarCreateHeader(h, filename, linktarget, statbuf->st_size, + statbuf->st_mode, statbuf->st_uid, statbuf->st_gid, + statbuf->st_mtime, + false /* write real POSIX header */); - /* Name 100 */ - strlcpy(&h[0], filename, 100); - if (linktarget != NULL || S_ISDIR(statbuf->st_mode)) - { - /* - * We only support symbolic links to directories, and this is - * indicated in the tar format by adding a slash at the end of the - * name, the same as for regular directories. - */ - int flen = strlen(filename); - - flen = Min(flen, 99); - h[flen] = '/'; - h[flen + 1] = '\0'; - } - - /* Mode 8 */ - sprintf(&h[100], "%07o ", (int) statbuf->st_mode); - - /* User ID 8 */ - sprintf(&h[108], "%07o ", statbuf->st_uid); - - /* Group 8 */ - sprintf(&h[116], "%07o ", statbuf->st_gid); - - /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */ - if (linktarget != NULL || S_ISDIR(statbuf->st_mode)) - /* Symbolic link or directory has size zero */ - print_val(&h[124], 0, 8, 11); - else - print_val(&h[124], statbuf->st_size, 8, 11); - sprintf(&h[135], " "); - - /* Mod Time 12 */ - sprintf(&h[136], "%011o ", (int) statbuf->st_mtime); - - /* Checksum 8 cannot be calculated until we've filled all other fields */ - - if (linktarget != NULL) - { - /* Type - Symbolic link */ - sprintf(&h[156], "2"); - /* Link Name 100 */ - strlcpy(&h[157], linktarget, 100); - } - else if (S_ISDIR(statbuf->st_mode)) - /* Type - directory */ - sprintf(&h[156], "5"); - else - /* Type - regular file */ - sprintf(&h[156], "0"); - - /* Magic 6 */ - sprintf(&h[257], "ustar"); - - /* Version 2 */ - sprintf(&h[263], "00"); - - /* User 32 */ - /* XXX: Do we need to care about setting correct username? */ - strlcpy(&h[265], "postgres", 32); - - /* Group 32 */ - /* XXX: Do we need to care about setting correct group name? */ - strlcpy(&h[297], "postgres", 32); - - /* Major Dev 8 */ - sprintf(&h[329], "%07o ", 0); - - /* Minor Dev 8 */ - sprintf(&h[337], "%07o ", 0); - - /* Prefix 155 - not used, leave as nulls */ - - /* - * We mustn't overwrite the next field while inserting the checksum. - * Fortunately, the checksum can't exceed 6 octal digits, so we just write - * 6 digits, a space, and a null, which is legal per POSIX. - */ - sprintf(&h[148], "%06o ", _tarChecksum(h)); - - /* Now send the completed header. */ pq_putmessage('d', h, 512); } diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 7a3e82983f6..c8c6533ab40 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -19,6 +19,7 @@ #define FRONTEND 1 #include "postgres.h" #include "libpq-fe.h" +#include "pgtar.h" #include #include @@ -757,7 +758,7 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) { char current_path[MAXPGPATH]; char filename[MAXPGPATH]; - int current_len_left; + pgoff_t current_len_left = 0; int current_padding = 0; char *copybuf = NULL; FILE *file = NULL; @@ -822,20 +823,10 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) } totaldone += 512; - if (sscanf(copybuf + 124, "%11o", ¤t_len_left) != 1) - { - fprintf(stderr, _("%s: could not parse file size\n"), - progname); - disconnect_and_exit(1); - } + current_len_left = read_tar_number(©buf[124], 12); /* Set permissions on the file */ - if (sscanf(©buf[100], "%07o ", &filemode) != 1) - { - fprintf(stderr, _("%s: could not parse file mode\n"), - progname); - disconnect_and_exit(1); - } + filemode = read_tar_number(©buf[100], 8); /* * All files are padded up to 512 bytes diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index 54ca16600c5..f05ea888a50 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -30,6 +30,7 @@ #include "pg_backup_tar.h" #include "dumpmem.h" #include "dumputils.h" +#include "pgtar.h" #include #include @@ -75,13 +76,6 @@ typedef struct ArchiveHandle *AH; } TAR_MEMBER; -/* - * Maximum file size for a tar member: The limit inherent in the - * format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed - * what we can represent in pgoff_t. - */ -#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1) - typedef struct { int hasSeek; @@ -115,7 +109,6 @@ static char *tarGets(char *buf, size_t len, TAR_MEMBER *th); static int tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...) __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4))); static void _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th); -static int _tarChecksum(char *th); static TAR_MEMBER *_tarPositionTo(ArchiveHandle *AH, const char *filename); static size_t tarRead(void *buf, size_t len, TAR_MEMBER *th); static size_t tarWrite(const void *buf, size_t len, TAR_MEMBER *th); @@ -1021,31 +1014,13 @@ tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...) return cnt; } -static int -_tarChecksum(char *header) -{ - int i, - sum; - - /* - * Per POSIX, the checksum is the simple sum of all bytes in the header, - * treating the bytes as unsigned, and treating the checksum field (at - * offset 148) as though it contained 8 spaces. - */ - sum = 8 * ' '; /* presumed value for checksum field */ - for (i = 0; i < 512; i++) - if (i < 148 || i >= 156) - sum += 0xFF & header[i]; - return sum; -} - bool isValidTarHeader(char *header) { int sum; - int chk = _tarChecksum(header); + int chk = tarChecksum(header); - sscanf(&header[148], "%8o", &sum); + sum = read_tar_number(&header[148], 8); if (sum != chk) return false; @@ -1084,13 +1059,6 @@ _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th) th->fileLen = ftello(tmp); fseeko(tmp, 0, SEEK_SET); - /* - * Some compilers will throw a warning knowing this test can never be true - * because pgoff_t can't exceed the compared maximum on their platform. - */ - if (th->fileLen > MAX_TAR_MEMBER_FILELEN) - exit_horribly(modulename, "archive member too large for tar format\n"); - _tarWriteHeader(th); while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0) @@ -1216,11 +1184,10 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th) { lclContext *ctx = (lclContext *) AH->formatData; char h[512]; - char tag[100]; + char tag[100 + 1]; int sum, chk; - size_t len; - unsigned long ullen; + pgoff_t len; pgoff_t hPos; bool gotBlock = false; @@ -1256,8 +1223,8 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th) (unsigned long) len); /* Calc checksum */ - chk = _tarChecksum(h); - sscanf(&h[148], "%8o", &sum); + chk = tarChecksum(h); + sum = read_tar_number(&h[148], 8); /* * If the checksum failed, see if it is a null block. If so, silently @@ -1280,27 +1247,31 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th) } } - sscanf(&h[0], "%99s", tag); - sscanf(&h[124], "%12lo", &ullen); - len = (size_t) ullen; + /* Name field is 100 bytes, might not be null-terminated */ + strlcpy(tag, &h[0], 100 + 1); + + len = read_tar_number(&h[124], 12); { - char buf[100]; + char posbuf[32]; + char lenbuf[32]; - snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) hPos); - ahlog(AH, 3, "TOC Entry %s at %s (length %lu, checksum %d)\n", - tag, buf, (unsigned long) len, sum); + snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos); + snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len); + ahlog(AH, 3, "TOC Entry %s at %s (length %s, checksum %d)\n", + tag, posbuf, lenbuf, sum); } if (chk != sum) { - char buf[100]; + char posbuf[32]; - snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ftello(ctx->tarFH)); + snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, + (uint64) ftello(ctx->tarFH)); exit_horribly(modulename, "corrupt tar header found in %s " "(expected %d, computed %d) file position %s\n", - tag, sum, chk, buf); + tag, sum, chk, posbuf); } th->targetFile = pg_strdup(tag); @@ -1310,86 +1281,16 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th) } -/* - * Utility routine to print possibly larger than 32 bit integers in a - * portable fashion. Filled with zeros. - */ -static void -print_val(char *s, uint64 val, unsigned int base, size_t len) -{ - int i; - - for (i = len; i > 0; i--) - { - int digit = val % base; - - s[i - 1] = '0' + digit; - val = val / base; - } -} - - static void _tarWriteHeader(TAR_MEMBER *th) { char h[512]; - int lastSum = 0; - int sum; - memset(h, 0, sizeof(h)); - - /* Name 100 */ - sprintf(&h[0], "%.99s", th->targetFile); - - /* Mode 8 */ - sprintf(&h[100], "100600 "); - - /* User ID 8 */ - sprintf(&h[108], "004000 "); - - /* Group 8 */ - sprintf(&h[116], "002000 "); - - /* File size 12 - 11 digits, 1 space, no NUL */ - print_val(&h[124], th->fileLen, 8, 11); - sprintf(&h[135], " "); - - /* Mod Time 12 */ - sprintf(&h[136], "%011o ", (int) time(NULL)); - - /* Checksum 8 */ - sprintf(&h[148], "%06o ", lastSum); - - /* Type - regular file */ - sprintf(&h[156], "0"); - - /* Link tag 100 (NULL) */ - - /* Magic 6 + Version 2 */ - sprintf(&h[257], "ustar00"); - -#if 0 - /* User 32 */ - sprintf(&h[265], "%.31s", ""); /* How do I get username reliably? Do - * I need to? */ - - /* Group 32 */ - sprintf(&h[297], "%.31s", ""); /* How do I get group reliably? Do I - * need to? */ - - /* Maj Dev 8 */ - sprintf(&h[329], "%6o ", 0); - - /* Min Dev 8 */ - sprintf(&h[337], "%6o ", 0); -#endif - - while ((sum = _tarChecksum(h)) != lastSum) - { - sprintf(&h[148], "%06o ", sum); - lastSum = sum; - } + tarCreateHeader(h, th->targetFile, NULL, th->fileLen, + 0600, 04000, 02000, time(NULL), + true /* backwards compatible format */); + /* Now write the completed header. */ if (fwrite(h, 1, 512, th->tarFH) != 512) exit_horribly(modulename, "could not write to output file: %s\n", strerror(errno)); } diff --git a/src/include/pgtar.h b/src/include/pgtar.h new file mode 100644 index 00000000000..45694d99598 --- /dev/null +++ b/src/include/pgtar.h @@ -0,0 +1,18 @@ +/*------------------------------------------------------------------------- + * + * pgtar.h + * Functions for manipulating tarfile datastructures (src/port/tar.c) + * + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/pgtar.h + * + *------------------------------------------------------------------------- + */ +extern void tarCreateHeader(char *h, const char *filename, const char *linktarget, + pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime, + bool bogus); +extern uint64 read_tar_number(const char *s, int len); +extern int tarChecksum(char *header); diff --git a/src/port/Makefile b/src/port/Makefile index 67677ae8937..b7a87643ff4 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -32,7 +32,7 @@ LIBS += $(PTHREAD_LIBS) OBJS = $(LIBOBJS) chklocale.o dirmod.o erand48.o exec.o fls.o inet_net_ntop.o \ noblock.o path.o pgcheckdir.o pg_crc.o pgmkdirp.o pgsleep.o \ - pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o + pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o tar.o thread.o # foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND OBJS_SRV = $(OBJS:%.o=%_srv.o) diff --git a/src/port/tar.c b/src/port/tar.c new file mode 100644 index 00000000000..8c2087b7fd5 --- /dev/null +++ b/src/port/tar.c @@ -0,0 +1,209 @@ +#include "c.h" +#include "pgtar.h" +#include + +/* + * Print a numeric field in a tar header. The field starts at *s and is of + * length len; val is the value to be written. + * + * Per POSIX, the way to write a number is in octal with leading zeroes and + * one trailing space (or NUL, but we use space) at the end of the specified + * field width. + * + * However, the given value may not fit in the available space in octal form. + * If that's true, we use the GNU extension of writing \200 followed by the + * number in base-256 form (ie, stored in binary MSB-first). (Note: here we + * support only non-negative numbers, so we don't worry about the GNU rules + * for handling negative numbers.) + */ +static void +print_tar_number(char *s, int len, uint64 val) +{ + if (val < (((uint64) 1) << ((len - 1) * 3))) + { + /* Use octal with trailing space */ + s[--len] = ' '; + while (len) + { + s[--len] = (val & 7) + '0'; + val >>= 3; + } + } + else + { + /* Use base-256 with leading \200 */ + s[0] = '\200'; + while (len > 1) + { + s[--len] = (val & 255); + val >>= 8; + } + } +} + + +/* + * Read a numeric field in a tar header. The field starts at *s and is of + * length len. + * + * The POSIX-approved format for a number is octal, ending with a space or + * NUL. However, for values that don't fit, we recognize the GNU extension + * of \200 followed by the number in base-256 form (ie, stored in binary + * MSB-first). (Note: here we support only non-negative numbers, so we don't + * worry about the GNU rules for handling negative numbers.) + */ +uint64 +read_tar_number(const char *s, int len) +{ + uint64 result = 0; + + if (*s == '\200') + { + /* base-256 */ + while (--len) + { + result <<= 8; + result |= (unsigned char) (*++s); + } + } + else + { + /* octal */ + while (len-- && *s >= '0' && *s <= '7') + { + result <<= 3; + result |= (*s - '0'); + s++; + } + } + return result; +} + + +/* + * Calculate the tar checksum for a header. The header is assumed to always + * be 512 bytes, per the tar standard. + */ +int +tarChecksum(char *header) +{ + int i, + sum; + + /* + * Per POSIX, the checksum is the simple sum of all bytes in the header, + * treating the bytes as unsigned, and treating the checksum field (at + * offset 148) as though it contained 8 spaces. + */ + sum = 8 * ' '; /* presumed value for checksum field */ + for (i = 0; i < 512; i++) + if (i < 148 || i >= 156) + sum += 0xFF & header[i]; + return sum; +} + + +/* + * Fill in the buffer pointed to by h with a tar format header. This buffer + * must always have space for 512 characters, which is a requirement of + * the tar format. + * + * "bogus" says to write the incorrect format marker that was emitted by + * pre-9.3 pg_dump. This is to avoid breaking compatibility unnecessarily + * with old copies of pg_restore. + */ +void +tarCreateHeader(char *h, const char *filename, const char *linktarget, + pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime, + bool bogus) +{ + memset(h, 0, 512); /* assume tar header size */ + + /* Name 100 */ + strlcpy(&h[0], filename, 100); + if (linktarget != NULL || S_ISDIR(mode)) + { + /* + * We only support symbolic links to directories, and this is + * indicated in the tar format by adding a slash at the end of the + * name, the same as for regular directories. + */ + int flen = strlen(filename); + + flen = Min(flen, 99); + h[flen] = '/'; + h[flen + 1] = '\0'; + } + + /* Mode 8 - this doesn't include the file type bits (S_IFMT) */ + print_tar_number(&h[100], 8, (mode & 07777)); + + /* User ID 8 */ + print_tar_number(&h[108], 8, uid); + + /* Group 8 */ + print_tar_number(&h[116], 8, gid); + + /* File size 12 */ + if (linktarget != NULL || S_ISDIR(mode)) + /* Symbolic link or directory has size zero */ + print_tar_number(&h[124], 12, 0); + else + print_tar_number(&h[124], 12, size); + + /* Mod Time 12 */ + print_tar_number(&h[136], 12, mtime); + + /* Checksum 8 cannot be calculated until we've filled all other fields */ + + if (linktarget != NULL) + { + /* Type - Symbolic link */ + h[156] = '2'; + /* Link Name 100 */ + strlcpy(&h[157], linktarget, 100); + } + else if (S_ISDIR(mode)) + { + /* Type - directory */ + h[156] = '5'; + } + else + { + /* Type - regular file */ + h[156] = '0'; + } + + if (bogus) + { + /* somebody's incorrect interpretation of Magic 6 + Version 2 */ + sprintf(&h[257], "ustar00"); + } + else + { + /* Magic 6 */ + strcpy(&h[257], "ustar"); + + /* Version 2 */ + memcpy(&h[263], "00", 2); + } + + /* User 32 */ + /* XXX: Do we need to care about setting correct username? */ + strlcpy(&h[265], "postgres", 32); + + /* Group 32 */ + /* XXX: Do we need to care about setting correct group name? */ + strlcpy(&h[297], "postgres", 32); + + /* Major Dev 8 */ + print_tar_number(&h[329], 8, 0); + + /* Minor Dev 8 */ + print_tar_number(&h[337], 8, 0); + + /* Prefix 155 - not used, leave as nulls */ + + /* Finally, compute and insert the checksum */ + print_tar_number(&h[148], 8, tarChecksum(h)); +}