mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Eliminate O(N^2) behavior in parallel restore with many blobs.
With hundreds of thousands of TOC entries, the repeated searches in reduce_dependencies() become the dominant cost. Get rid of that searching by constructing reverse-dependency lists, which we can do in O(N) time during the fix_dependencies() preprocessing. I chose to store the reverse dependencies as DumpId arrays for consistency with the forward-dependency representation, and keep the previously-transient tocsByDumpId[] array around to locate actual TOC entry structs quickly from dump IDs. While this fixes the slow case reported by Vlad Arkhipov, there is still a potential for O(N^2) behavior with sufficiently many tables: fix_dependencies itself, as well as mark_create_done and inhibit_data_for_failed_table, are doing repeated searches to deal with table-to-table-data dependencies. Possibly this work could be extended to deal with that, although the latter two functions are also used in non-parallel restore where we currently don't run fix_dependencies. Another TODO is that we fail to parallelize restore of multiple blobs at all. This appears to require changes in the archive format to fix. Back-patch to 9.0 where the problem was reported. 8.4 has potential issues as well; but since it doesn't create a separate TOC entry for each blob, it's at much less risk of having enough TOC entries to cause real problems.
This commit is contained in:
		@@ -79,6 +79,10 @@ const char *progname;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
static const char *modulename = gettext_noop("archiver");
 | 
					static const char *modulename = gettext_noop("archiver");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* index array created by fix_dependencies -- only used in parallel restore */
 | 
				
			||||||
 | 
					static TocEntry	  **tocsByDumpId;			/* index by dumpId - 1 */
 | 
				
			||||||
 | 
					static DumpId		maxDumpId;				/* length of above array */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 | 
					static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 | 
				
			||||||
		 const int compression, ArchiveMode mode);
 | 
							 const int compression, ArchiveMode mode);
 | 
				
			||||||
@@ -134,9 +138,7 @@ static void fix_dependencies(ArchiveHandle *AH);
 | 
				
			|||||||
static bool has_lock_conflicts(TocEntry *te1, TocEntry *te2);
 | 
					static bool has_lock_conflicts(TocEntry *te1, TocEntry *te2);
 | 
				
			||||||
static void repoint_table_dependencies(ArchiveHandle *AH,
 | 
					static void repoint_table_dependencies(ArchiveHandle *AH,
 | 
				
			||||||
						   DumpId tableId, DumpId tableDataId);
 | 
											   DumpId tableId, DumpId tableDataId);
 | 
				
			||||||
static void identify_locking_dependencies(TocEntry *te,
 | 
					static void identify_locking_dependencies(TocEntry *te);
 | 
				
			||||||
							  TocEntry **tocsByDumpId,
 | 
					 | 
				
			||||||
							  DumpId maxDumpId);
 | 
					 | 
				
			||||||
static void reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
 | 
					static void reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
 | 
				
			||||||
					TocEntry *ready_list);
 | 
										TocEntry *ready_list);
 | 
				
			||||||
static void mark_create_done(ArchiveHandle *AH, TocEntry *te);
 | 
					static void mark_create_done(ArchiveHandle *AH, TocEntry *te);
 | 
				
			||||||
@@ -3743,7 +3745,12 @@ mark_work_done(ArchiveHandle *AH, TocEntry *ready_list,
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Process the dependency information into a form useful for parallel restore.
 | 
					 * Process the dependency information into a form useful for parallel restore.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * We set up depCount fields that are the number of as-yet-unprocessed
 | 
					 * This function takes care of fixing up some missing or badly designed
 | 
				
			||||||
 | 
					 * dependencies, and then prepares subsidiary data structures that will be
 | 
				
			||||||
 | 
					 * used in the main parallel-restore logic, including:
 | 
				
			||||||
 | 
					 * 1. We build the tocsByDumpId[] index array.
 | 
				
			||||||
 | 
					 * 2. We build the revDeps[] arrays of incoming dependency dumpIds.
 | 
				
			||||||
 | 
					 * 3. We set up depCount fields that are the number of as-yet-unprocessed
 | 
				
			||||||
 * dependencies for each TOC entry.
 | 
					 * dependencies for each TOC entry.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * We also identify locking dependencies so that we can avoid trying to
 | 
					 * We also identify locking dependencies so that we can avoid trying to
 | 
				
			||||||
@@ -3752,22 +3759,20 @@ mark_work_done(ArchiveHandle *AH, TocEntry *ready_list,
 | 
				
			|||||||
static void
 | 
					static void
 | 
				
			||||||
fix_dependencies(ArchiveHandle *AH)
 | 
					fix_dependencies(ArchiveHandle *AH)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	TocEntry  **tocsByDumpId;
 | 
					 | 
				
			||||||
	TocEntry   *te;
 | 
						TocEntry   *te;
 | 
				
			||||||
	DumpId		maxDumpId;
 | 
					 | 
				
			||||||
	int			i;
 | 
						int			i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * For some of the steps here, it is convenient to have an array that
 | 
						 * It is convenient to have an array that indexes the TOC entries by dump
 | 
				
			||||||
	 * indexes the TOC entries by dump ID, rather than searching the TOC list
 | 
						 * ID, rather than searching the TOC list repeatedly.  Entries for dump
 | 
				
			||||||
	 * repeatedly.	Entries for dump IDs not present in the TOC will be NULL.
 | 
						 * IDs not present in the TOC will be NULL.
 | 
				
			||||||
	 *
 | 
						 *
 | 
				
			||||||
	 * NOTE: because maxDumpId is just the highest dump ID defined in the
 | 
						 * NOTE: because maxDumpId is just the highest dump ID defined in the
 | 
				
			||||||
	 * archive, there might be dependencies for IDs > maxDumpId.  All uses of
 | 
						 * archive, there might be dependencies for IDs > maxDumpId.  All uses of
 | 
				
			||||||
	 * this array must guard against out-of-range dependency numbers.
 | 
						 * this array must guard against out-of-range dependency numbers.
 | 
				
			||||||
	 *
 | 
						 *
 | 
				
			||||||
	 * Also, initialize the depCount fields, and make sure all the TOC items
 | 
						 * Also, initialize the depCount/revDeps/nRevDeps fields, and make sure
 | 
				
			||||||
	 * are marked as not being in any parallel-processing list.
 | 
						 * the TOC items are marked as not being in any parallel-processing list.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	maxDumpId = AH->maxDumpId;
 | 
						maxDumpId = AH->maxDumpId;
 | 
				
			||||||
	tocsByDumpId = (TocEntry **) calloc(maxDumpId, sizeof(TocEntry *));
 | 
						tocsByDumpId = (TocEntry **) calloc(maxDumpId, sizeof(TocEntry *));
 | 
				
			||||||
@@ -3775,6 +3780,8 @@ fix_dependencies(ArchiveHandle *AH)
 | 
				
			|||||||
	{
 | 
						{
 | 
				
			||||||
		tocsByDumpId[te->dumpId - 1] = te;
 | 
							tocsByDumpId[te->dumpId - 1] = te;
 | 
				
			||||||
		te->depCount = te->nDeps;
 | 
							te->depCount = te->nDeps;
 | 
				
			||||||
 | 
							te->revDeps = NULL;
 | 
				
			||||||
 | 
							te->nRevDeps = 0;
 | 
				
			||||||
		te->par_prev = NULL;
 | 
							te->par_prev = NULL;
 | 
				
			||||||
		te->par_next = NULL;
 | 
							te->par_next = NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -3792,6 +3799,9 @@ fix_dependencies(ArchiveHandle *AH)
 | 
				
			|||||||
	 * TABLE, if possible.	However, if the dependency isn't in the archive
 | 
						 * TABLE, if possible.	However, if the dependency isn't in the archive
 | 
				
			||||||
	 * then just assume it was a TABLE; this is to cover cases where the table
 | 
						 * then just assume it was a TABLE; this is to cover cases where the table
 | 
				
			||||||
	 * was suppressed but we have the data and some dependent post-data items.
 | 
						 * was suppressed but we have the data and some dependent post-data items.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * XXX this is O(N^2) if there are a lot of tables.  We ought to fix
 | 
				
			||||||
 | 
						 * pg_dump to produce correctly-linked dependencies in the first place.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	for (te = AH->toc->next; te != AH->toc; te = te->next)
 | 
						for (te = AH->toc->next; te != AH->toc; te = te->next)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
@@ -3838,8 +3848,14 @@ fix_dependencies(ArchiveHandle *AH)
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * It is possible that the dependencies list items that are not in the
 | 
						 * At this point we start to build the revDeps reverse-dependency arrays,
 | 
				
			||||||
	 * archive at all.	Subtract such items from the depCounts.
 | 
						 * so all changes of dependencies must be complete.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Count the incoming dependencies for each item.  Also, it is possible
 | 
				
			||||||
 | 
						 * that the dependencies list items that are not in the archive at
 | 
				
			||||||
 | 
						 * all.  Subtract such items from the depCounts.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	for (te = AH->toc->next; te != AH->toc; te = te->next)
 | 
						for (te = AH->toc->next; te != AH->toc; te = te->next)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
@@ -3847,11 +3863,43 @@ fix_dependencies(ArchiveHandle *AH)
 | 
				
			|||||||
		{
 | 
							{
 | 
				
			||||||
			DumpId		depid = te->dependencies[i];
 | 
								DumpId		depid = te->dependencies[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (depid > maxDumpId || tocsByDumpId[depid - 1] == NULL)
 | 
								if (depid <= maxDumpId && tocsByDumpId[depid - 1] != NULL)
 | 
				
			||||||
 | 
									tocsByDumpId[depid - 1]->nRevDeps++;
 | 
				
			||||||
 | 
								else
 | 
				
			||||||
				te->depCount--;
 | 
									te->depCount--;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Allocate space for revDeps[] arrays, and reset nRevDeps so we can
 | 
				
			||||||
 | 
						 * use it as a counter below.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						for (te = AH->toc->next; te != AH->toc; te = te->next)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							if (te->nRevDeps > 0)
 | 
				
			||||||
 | 
								te->revDeps = (DumpId *) malloc(te->nRevDeps * sizeof(DumpId));
 | 
				
			||||||
 | 
							te->nRevDeps = 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Build the revDeps[] arrays of incoming-dependency dumpIds.  This
 | 
				
			||||||
 | 
						 * had better agree with the loops above.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						for (te = AH->toc->next; te != AH->toc; te = te->next)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							for (i = 0; i < te->nDeps; i++)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								DumpId		depid = te->dependencies[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (depid <= maxDumpId && tocsByDumpId[depid - 1] != NULL)
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									TocEntry   *otherte = tocsByDumpId[depid - 1];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									otherte->revDeps[otherte->nRevDeps++] = te->dumpId;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Lastly, work out the locking dependencies.
 | 
						 * Lastly, work out the locking dependencies.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
@@ -3859,10 +3907,8 @@ fix_dependencies(ArchiveHandle *AH)
 | 
				
			|||||||
	{
 | 
						{
 | 
				
			||||||
		te->lockDeps = NULL;
 | 
							te->lockDeps = NULL;
 | 
				
			||||||
		te->nLockDeps = 0;
 | 
							te->nLockDeps = 0;
 | 
				
			||||||
		identify_locking_dependencies(te, tocsByDumpId, maxDumpId);
 | 
							identify_locking_dependencies(te);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					 | 
				
			||||||
	free(tocsByDumpId);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
@@ -3896,13 +3942,9 @@ repoint_table_dependencies(ArchiveHandle *AH,
 | 
				
			|||||||
 * Identify which objects we'll need exclusive lock on in order to restore
 | 
					 * Identify which objects we'll need exclusive lock on in order to restore
 | 
				
			||||||
 * the given TOC entry (*other* than the one identified by the TOC entry
 | 
					 * the given TOC entry (*other* than the one identified by the TOC entry
 | 
				
			||||||
 * itself).  Record their dump IDs in the entry's lockDeps[] array.
 | 
					 * itself).  Record their dump IDs in the entry's lockDeps[] array.
 | 
				
			||||||
 * tocsByDumpId[] is a convenience array (of size maxDumpId) to avoid
 | 
					 | 
				
			||||||
 * searching the TOC for each dependency.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
identify_locking_dependencies(TocEntry *te,
 | 
					identify_locking_dependencies(TocEntry *te)
 | 
				
			||||||
							  TocEntry **tocsByDumpId,
 | 
					 | 
				
			||||||
							  DumpId maxDumpId)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	DumpId	   *lockids;
 | 
						DumpId	   *lockids;
 | 
				
			||||||
	int			nlockids;
 | 
						int			nlockids;
 | 
				
			||||||
@@ -3956,31 +3998,21 @@ identify_locking_dependencies(TocEntry *te,
 | 
				
			|||||||
static void
 | 
					static void
 | 
				
			||||||
reduce_dependencies(ArchiveHandle *AH, TocEntry *te, TocEntry *ready_list)
 | 
					reduce_dependencies(ArchiveHandle *AH, TocEntry *te, TocEntry *ready_list)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	DumpId		target = te->dumpId;
 | 
					 | 
				
			||||||
	int			i;
 | 
						int			i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ahlog(AH, 2, "reducing dependencies for %d\n", target);
 | 
						ahlog(AH, 2, "reducing dependencies for %d\n", te->dumpId);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						for (i = 0; i < te->nRevDeps; i++)
 | 
				
			||||||
	 * We must examine all entries, not only the ones after the target item,
 | 
					 | 
				
			||||||
	 * because if the user used a -L switch then the original dependency-
 | 
					 | 
				
			||||||
	 * respecting order has been destroyed by SortTocFromFile.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	for (te = AH->toc->next; te != AH->toc; te = te->next)
 | 
					 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		for (i = 0; i < te->nDeps; i++)
 | 
							TocEntry   *otherte = tocsByDumpId[te->revDeps[i] - 1];
 | 
				
			||||||
		{
 | 
					
 | 
				
			||||||
			if (te->dependencies[i] == target)
 | 
							otherte->depCount--;
 | 
				
			||||||
			{
 | 
							if (otherte->depCount == 0 && otherte->par_prev != NULL)
 | 
				
			||||||
				te->depCount--;
 | 
					 | 
				
			||||||
				if (te->depCount == 0 && te->par_prev != NULL)
 | 
					 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			/* It must be in the pending list, so remove it ... */
 | 
								/* It must be in the pending list, so remove it ... */
 | 
				
			||||||
					par_list_remove(te);
 | 
								par_list_remove(otherte);
 | 
				
			||||||
			/* ... and add to ready_list */
 | 
								/* ... and add to ready_list */
 | 
				
			||||||
					par_list_append(ready_list, te);
 | 
								par_list_append(ready_list, otherte);
 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -326,6 +326,8 @@ typedef struct _tocEntry
 | 
				
			|||||||
	struct _tocEntry *par_next; /* these are NULL if not in either list */
 | 
						struct _tocEntry *par_next; /* these are NULL if not in either list */
 | 
				
			||||||
	bool		created;		/* set for DATA member if TABLE was created */
 | 
						bool		created;		/* set for DATA member if TABLE was created */
 | 
				
			||||||
	int			depCount;		/* number of dependencies not yet restored */
 | 
						int			depCount;		/* number of dependencies not yet restored */
 | 
				
			||||||
 | 
						DumpId	   *revDeps;		/* dumpIds of objects depending on this one */
 | 
				
			||||||
 | 
						int			nRevDeps;		/* number of such dependencies */
 | 
				
			||||||
	DumpId	   *lockDeps;		/* dumpIds of objects this one needs lock on */
 | 
						DumpId	   *lockDeps;		/* dumpIds of objects this one needs lock on */
 | 
				
			||||||
	int			nLockDeps;		/* number of such dependencies */
 | 
						int			nLockDeps;		/* number of such dependencies */
 | 
				
			||||||
} TocEntry;
 | 
					} TocEntry;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user