diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index 6cb06d4910c..b942cb238b1 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -246,12 +246,14 @@ PostgreSQL documentation
- Run the most time-consuming parts
- of pg_restore — those which load data,
- create indexes, or create constraints — using multiple
- concurrent jobs. This option can dramatically reduce the time
+ Run the most time-consuming steps
+ of pg_restore — those that load data,
+ create indexes, or create constraints — concurrently, using up
+ to number-of-jobs
+ concurrent sessions. This option can dramatically reduce the time
to restore a large database to a server running on a
- multiprocessor machine.
+ multiprocessor machine. This option is ignored when emitting a script
+ rather than connecting directly to a database server.
@@ -274,8 +276,7 @@ PostgreSQL documentation
Only the custom and directory archive formats are supported
with this option.
The input must be a regular file or directory (not, for example, a
- pipe). This option is ignored when emitting a script rather
- than connecting directly to a database server. Also, multiple
+ pipe or standard input). Also, multiple
jobs cannot be used together with the
option .
diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c
index 3a9881d6010..971e6adf487 100644
--- a/src/bin/pg_dump/pg_backup_custom.c
+++ b/src/bin/pg_dump/pg_backup_custom.c
@@ -70,6 +70,8 @@ typedef struct
{
CompressorState *cs;
int hasSeek;
+ /* lastFilePos is used only when reading, and may be invalid if !hasSeek */
+ pgoff_t lastFilePos; /* position after last data block we've read */
} lclContext;
typedef struct
@@ -181,8 +183,13 @@ InitArchiveFmt_Custom(ArchiveHandle *AH)
ReadHead(AH);
ReadToc(AH);
- }
+ /*
+ * Remember location of first data block (i.e., the point after TOC)
+ * in case we have to search for desired data blocks.
+ */
+ ctx->lastFilePos = _getFilePos(AH, ctx);
+ }
}
/*
@@ -418,13 +425,62 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
{
/*
* We cannot seek directly to the desired block. Instead, skip over
- * block headers until we find the one we want. This could fail if we
- * are asked to restore items out-of-order.
+ * block headers until we find the one we want. Remember the
+ * positions of skipped-over blocks, so that if we later decide we
+ * need to read one, we'll be able to seek to it.
+ *
+ * When our input file is seekable, we can do the search starting from
+ * the point after the last data block we scanned in previous
+ * iterations of this function.
*/
- _readBlockHeader(AH, &blkType, &id);
-
- while (blkType != EOF && id != te->dumpId)
+ if (ctx->hasSeek)
{
+ if (fseeko(AH->FH, ctx->lastFilePos, SEEK_SET) != 0)
+ fatal("error during file seek: %m");
+ }
+
+ for (;;)
+ {
+ pgoff_t thisBlkPos = _getFilePos(AH, ctx);
+
+ _readBlockHeader(AH, &blkType, &id);
+
+ if (blkType == EOF || id == te->dumpId)
+ break;
+
+ /* Remember the block position, if we got one */
+ if (thisBlkPos >= 0)
+ {
+ TocEntry *otherte = getTocEntryByDumpId(AH, id);
+
+ if (otherte && otherte->formatData)
+ {
+ lclTocEntry *othertctx = (lclTocEntry *) otherte->formatData;
+
+ /*
+ * Note: on Windows, multiple threads might access/update
+ * the same lclTocEntry concurrently, but that should be
+ * safe as long as we update dataPos before dataState.
+ * Ideally, we'd use pg_write_barrier() to enforce that,
+ * but the needed infrastructure doesn't exist in frontend
+ * code. But Windows only runs on machines with strong
+ * store ordering, so it should be okay for now.
+ */
+ if (othertctx->dataState == K_OFFSET_POS_NOT_SET)
+ {
+ othertctx->dataPos = thisBlkPos;
+ othertctx->dataState = K_OFFSET_POS_SET;
+ }
+ else if (othertctx->dataPos != thisBlkPos ||
+ othertctx->dataState != K_OFFSET_POS_SET)
+ {
+ /* sanity check */
+ pg_log_warning("data block %d has wrong seek position",
+ id);
+ }
+ }
+ }
+
switch (blkType)
{
case BLK_DATA:
@@ -440,7 +496,6 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
blkType);
break;
}
- _readBlockHeader(AH, &blkType, &id);
}
}
else
@@ -452,20 +507,18 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
_readBlockHeader(AH, &blkType, &id);
}
- /* Produce suitable failure message if we fell off end of file */
+ /*
+ * If we reached EOF without finding the block we want, then either it
+ * doesn't exist, or it does but we lack the ability to seek back to it.
+ */
if (blkType == EOF)
{
- if (tctx->dataState == K_OFFSET_POS_NOT_SET)
- fatal("could not find block ID %d in archive -- "
- "possibly due to out-of-order restore request, "
- "which cannot be handled due to lack of data offsets in archive",
- te->dumpId);
- else if (!ctx->hasSeek)
+ if (!ctx->hasSeek)
fatal("could not find block ID %d in archive -- "
"possibly due to out-of-order restore request, "
"which cannot be handled due to non-seekable input file",
te->dumpId);
- else /* huh, the dataPos led us to EOF? */
+ else
fatal("could not find block ID %d in archive -- "
"possibly corrupt archive",
te->dumpId);
@@ -491,6 +544,20 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
blkType);
break;
}
+
+ /*
+ * If our input file is seekable but lacks data offsets, update our
+ * knowledge of where to start future searches from. (Note that we did
+ * not update the current TE's dataState/dataPos. We could have, but
+ * there is no point since it will not be visited again.)
+ */
+ if (ctx->hasSeek && tctx->dataState == K_OFFSET_POS_NOT_SET)
+ {
+ pgoff_t curPos = _getFilePos(AH, ctx);
+
+ if (curPos > ctx->lastFilePos)
+ ctx->lastFilePos = curPos;
+ }
}
/*
@@ -548,6 +615,7 @@ _skipBlobs(ArchiveHandle *AH)
static void
_skipData(ArchiveHandle *AH)
{
+ lclContext *ctx = (lclContext *) AH->formatData;
size_t blkLen;
char *buf = NULL;
int buflen = 0;
@@ -556,19 +624,27 @@ _skipData(ArchiveHandle *AH)
blkLen = ReadInt(AH);
while (blkLen != 0)
{
- if (blkLen > buflen)
+ if (ctx->hasSeek)
{
- if (buf)
- free(buf);
- buf = (char *) pg_malloc(blkLen);
- buflen = blkLen;
+ if (fseeko(AH->FH, blkLen, SEEK_CUR) != 0)
+ fatal("error during file seek: %m");
}
- if ((cnt = fread(buf, 1, blkLen, AH->FH)) != blkLen)
+ else
{
- if (feof(AH->FH))
- fatal("could not read from input file: end of file");
- else
- fatal("could not read from input file: %m");
+ if (blkLen > buflen)
+ {
+ if (buf)
+ free(buf);
+ buf = (char *) pg_malloc(blkLen);
+ buflen = blkLen;
+ }
+ if ((cnt = fread(buf, 1, blkLen, AH->FH)) != blkLen)
+ {
+ if (feof(AH->FH))
+ fatal("could not read from input file: end of file");
+ else
+ fatal("could not read from input file: %m");
+ }
}
blkLen = ReadInt(AH);
@@ -804,6 +880,9 @@ _Clone(ArchiveHandle *AH)
{
lclContext *ctx = (lclContext *) AH->formatData;
+ /*
+ * Each thread must have private lclContext working state.
+ */
AH->formatData = (lclContext *) pg_malloc(sizeof(lclContext));
memcpy(AH->formatData, ctx, sizeof(lclContext));
ctx = (lclContext *) AH->formatData;
@@ -813,10 +892,13 @@ _Clone(ArchiveHandle *AH)
fatal("compressor active");
/*
+ * We intentionally do not clone TOC-entry-local state: it's useful to
+ * share knowledge about where the data blocks are across threads.
+ * _PrintTocData has to be careful about the order of operations on that
+ * state, though.
+ *
* Note: we do not make a local lo_buf because we expect at most one BLOBS
- * entry per archive, so no parallelism is possible. Likewise,
- * TOC-entry-local state isn't an issue because any one TOC entry is
- * touched by just one worker child.
+ * entry per archive, so no parallelism is possible.
*/
}