diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index 6cb06d4910c..b942cb238b1 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -246,12 +246,14 @@ PostgreSQL documentation - Run the most time-consuming parts - of pg_restore — those which load data, - create indexes, or create constraints — using multiple - concurrent jobs. This option can dramatically reduce the time + Run the most time-consuming steps + of pg_restore — those that load data, + create indexes, or create constraints — concurrently, using up + to number-of-jobs + concurrent sessions. This option can dramatically reduce the time to restore a large database to a server running on a - multiprocessor machine. + multiprocessor machine. This option is ignored when emitting a script + rather than connecting directly to a database server. @@ -274,8 +276,7 @@ PostgreSQL documentation Only the custom and directory archive formats are supported with this option. The input must be a regular file or directory (not, for example, a - pipe). This option is ignored when emitting a script rather - than connecting directly to a database server. Also, multiple + pipe or standard input). Also, multiple jobs cannot be used together with the option . diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c index 3a9881d6010..971e6adf487 100644 --- a/src/bin/pg_dump/pg_backup_custom.c +++ b/src/bin/pg_dump/pg_backup_custom.c @@ -70,6 +70,8 @@ typedef struct { CompressorState *cs; int hasSeek; + /* lastFilePos is used only when reading, and may be invalid if !hasSeek */ + pgoff_t lastFilePos; /* position after last data block we've read */ } lclContext; typedef struct @@ -181,8 +183,13 @@ InitArchiveFmt_Custom(ArchiveHandle *AH) ReadHead(AH); ReadToc(AH); - } + /* + * Remember location of first data block (i.e., the point after TOC) + * in case we have to search for desired data blocks. + */ + ctx->lastFilePos = _getFilePos(AH, ctx); + } } /* @@ -418,13 +425,62 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te) { /* * We cannot seek directly to the desired block. Instead, skip over - * block headers until we find the one we want. This could fail if we - * are asked to restore items out-of-order. + * block headers until we find the one we want. Remember the + * positions of skipped-over blocks, so that if we later decide we + * need to read one, we'll be able to seek to it. + * + * When our input file is seekable, we can do the search starting from + * the point after the last data block we scanned in previous + * iterations of this function. */ - _readBlockHeader(AH, &blkType, &id); - - while (blkType != EOF && id != te->dumpId) + if (ctx->hasSeek) { + if (fseeko(AH->FH, ctx->lastFilePos, SEEK_SET) != 0) + fatal("error during file seek: %m"); + } + + for (;;) + { + pgoff_t thisBlkPos = _getFilePos(AH, ctx); + + _readBlockHeader(AH, &blkType, &id); + + if (blkType == EOF || id == te->dumpId) + break; + + /* Remember the block position, if we got one */ + if (thisBlkPos >= 0) + { + TocEntry *otherte = getTocEntryByDumpId(AH, id); + + if (otherte && otherte->formatData) + { + lclTocEntry *othertctx = (lclTocEntry *) otherte->formatData; + + /* + * Note: on Windows, multiple threads might access/update + * the same lclTocEntry concurrently, but that should be + * safe as long as we update dataPos before dataState. + * Ideally, we'd use pg_write_barrier() to enforce that, + * but the needed infrastructure doesn't exist in frontend + * code. But Windows only runs on machines with strong + * store ordering, so it should be okay for now. + */ + if (othertctx->dataState == K_OFFSET_POS_NOT_SET) + { + othertctx->dataPos = thisBlkPos; + othertctx->dataState = K_OFFSET_POS_SET; + } + else if (othertctx->dataPos != thisBlkPos || + othertctx->dataState != K_OFFSET_POS_SET) + { + /* sanity check */ + pg_log_warning("data block %d has wrong seek position", + id); + } + } + } + switch (blkType) { case BLK_DATA: @@ -440,7 +496,6 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te) blkType); break; } - _readBlockHeader(AH, &blkType, &id); } } else @@ -452,20 +507,18 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te) _readBlockHeader(AH, &blkType, &id); } - /* Produce suitable failure message if we fell off end of file */ + /* + * If we reached EOF without finding the block we want, then either it + * doesn't exist, or it does but we lack the ability to seek back to it. + */ if (blkType == EOF) { - if (tctx->dataState == K_OFFSET_POS_NOT_SET) - fatal("could not find block ID %d in archive -- " - "possibly due to out-of-order restore request, " - "which cannot be handled due to lack of data offsets in archive", - te->dumpId); - else if (!ctx->hasSeek) + if (!ctx->hasSeek) fatal("could not find block ID %d in archive -- " "possibly due to out-of-order restore request, " "which cannot be handled due to non-seekable input file", te->dumpId); - else /* huh, the dataPos led us to EOF? */ + else fatal("could not find block ID %d in archive -- " "possibly corrupt archive", te->dumpId); @@ -491,6 +544,20 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te) blkType); break; } + + /* + * If our input file is seekable but lacks data offsets, update our + * knowledge of where to start future searches from. (Note that we did + * not update the current TE's dataState/dataPos. We could have, but + * there is no point since it will not be visited again.) + */ + if (ctx->hasSeek && tctx->dataState == K_OFFSET_POS_NOT_SET) + { + pgoff_t curPos = _getFilePos(AH, ctx); + + if (curPos > ctx->lastFilePos) + ctx->lastFilePos = curPos; + } } /* @@ -548,6 +615,7 @@ _skipBlobs(ArchiveHandle *AH) static void _skipData(ArchiveHandle *AH) { + lclContext *ctx = (lclContext *) AH->formatData; size_t blkLen; char *buf = NULL; int buflen = 0; @@ -556,19 +624,27 @@ _skipData(ArchiveHandle *AH) blkLen = ReadInt(AH); while (blkLen != 0) { - if (blkLen > buflen) + if (ctx->hasSeek) { - if (buf) - free(buf); - buf = (char *) pg_malloc(blkLen); - buflen = blkLen; + if (fseeko(AH->FH, blkLen, SEEK_CUR) != 0) + fatal("error during file seek: %m"); } - if ((cnt = fread(buf, 1, blkLen, AH->FH)) != blkLen) + else { - if (feof(AH->FH)) - fatal("could not read from input file: end of file"); - else - fatal("could not read from input file: %m"); + if (blkLen > buflen) + { + if (buf) + free(buf); + buf = (char *) pg_malloc(blkLen); + buflen = blkLen; + } + if ((cnt = fread(buf, 1, blkLen, AH->FH)) != blkLen) + { + if (feof(AH->FH)) + fatal("could not read from input file: end of file"); + else + fatal("could not read from input file: %m"); + } } blkLen = ReadInt(AH); @@ -804,6 +880,9 @@ _Clone(ArchiveHandle *AH) { lclContext *ctx = (lclContext *) AH->formatData; + /* + * Each thread must have private lclContext working state. + */ AH->formatData = (lclContext *) pg_malloc(sizeof(lclContext)); memcpy(AH->formatData, ctx, sizeof(lclContext)); ctx = (lclContext *) AH->formatData; @@ -813,10 +892,13 @@ _Clone(ArchiveHandle *AH) fatal("compressor active"); /* + * We intentionally do not clone TOC-entry-local state: it's useful to + * share knowledge about where the data blocks are across threads. + * _PrintTocData has to be careful about the order of operations on that + * state, though. + * * Note: we do not make a local lo_buf because we expect at most one BLOBS - * entry per archive, so no parallelism is possible. Likewise, - * TOC-entry-local state isn't an issue because any one TOC entry is - * touched by just one worker child. + * entry per archive, so no parallelism is possible. */ }