mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Don't lock tables in RelationGetPartitionDispatchInfo.
Instead, lock them in the caller using find_all_inheritors so that they get locked in the standard order, minimizing deadlock risks. Also in RelationGetPartitionDispatchInfo, avoid opening tables which are not partitioned; there's no need. Amit Langote, reviewed by Ashutosh Bapat and Amit Khandekar Discussion: http://postgr.es/m/91b36fa1-c197-b72f-ca6e-56c593bae68c@lab.ntt.co.jp
This commit is contained in:
		| @@ -1000,12 +1000,16 @@ get_partition_qual_relid(Oid relid) | |||||||
|  * RelationGetPartitionDispatchInfo |  * RelationGetPartitionDispatchInfo | ||||||
|  *		Returns information necessary to route tuples down a partition tree |  *		Returns information necessary to route tuples down a partition tree | ||||||
|  * |  * | ||||||
|  * All the partitions will be locked with lockmode, unless it is NoLock. |  * The number of elements in the returned array (that is, the number of | ||||||
|  * A list of the OIDs of all the leaf partitions of rel is returned in |  * PartitionDispatch objects for the partitioned tables in the partition tree) | ||||||
|  * *leaf_part_oids. |  * is returned in *num_parted and a list of the OIDs of all the leaf | ||||||
|  |  * partitions of rel is returned in *leaf_part_oids. | ||||||
|  |  * | ||||||
|  |  * All the relations in the partition tree (including 'rel') must have been | ||||||
|  |  * locked (using at least the AccessShareLock) by the caller. | ||||||
|  */ |  */ | ||||||
| PartitionDispatch * | PartitionDispatch * | ||||||
| RelationGetPartitionDispatchInfo(Relation rel, int lockmode, | RelationGetPartitionDispatchInfo(Relation rel, | ||||||
| 								 int *num_parted, List **leaf_part_oids) | 								 int *num_parted, List **leaf_part_oids) | ||||||
| { | { | ||||||
| 	PartitionDispatchData **pd; | 	PartitionDispatchData **pd; | ||||||
| @@ -1020,14 +1024,18 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode, | |||||||
| 				offset; | 				offset; | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Lock partitions and make a list of the partitioned ones to prepare | 	 * We rely on the relcache to traverse the partition tree to build both | ||||||
| 	 * their PartitionDispatch objects below. | 	 * the leaf partition OIDs list and the array of PartitionDispatch objects | ||||||
|  | 	 * for the partitioned tables in the tree.  That means every partitioned | ||||||
|  | 	 * table in the tree must be locked, which is fine since we require the | ||||||
|  | 	 * caller to lock all the partitions anyway. | ||||||
| 	 * | 	 * | ||||||
| 	 * Cannot use find_all_inheritors() here, because then the order of OIDs | 	 * For every partitioned table in the tree, starting with the root | ||||||
| 	 * in parted_rels list would be unknown, which does not help, because we | 	 * partitioned table, add its relcache entry to parted_rels, while also | ||||||
| 	 * assign indexes within individual PartitionDispatch in an order that is | 	 * queuing its partitions (in the order in which they appear in the | ||||||
| 	 * predetermined (determined by the order of OIDs in individual partition | 	 * partition descriptor) to be looked at later in the same loop.  This is | ||||||
| 	 * descriptors). | 	 * a bit tricky but works because the foreach() macro doesn't fetch the | ||||||
|  | 	 * next list element until the bottom of the loop. | ||||||
| 	 */ | 	 */ | ||||||
| 	*num_parted = 1; | 	*num_parted = 1; | ||||||
| 	parted_rels = list_make1(rel); | 	parted_rels = list_make1(rel); | ||||||
| @@ -1036,29 +1044,24 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode, | |||||||
| 	APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents); | 	APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents); | ||||||
| 	forboth(lc1, all_parts, lc2, all_parents) | 	forboth(lc1, all_parts, lc2, all_parents) | ||||||
| 	{ | 	{ | ||||||
| 		Relation	partrel = heap_open(lfirst_oid(lc1), lockmode); | 		Oid			partrelid = lfirst_oid(lc1); | ||||||
| 		Relation	parent = lfirst(lc2); | 		Relation	parent = lfirst(lc2); | ||||||
| 		PartitionDesc partdesc = RelationGetPartitionDesc(partrel); |  | ||||||
|  |  | ||||||
| 		/* | 		if (get_rel_relkind(partrelid) == RELKIND_PARTITIONED_TABLE) | ||||||
| 		 * If this partition is a partitioned table, add its children to the |  | ||||||
| 		 * end of the list, so that they are processed as well. |  | ||||||
| 		 */ |  | ||||||
| 		if (partdesc) |  | ||||||
| 		{ | 		{ | ||||||
|  | 			/* | ||||||
|  | 			 * Already locked by the caller.  Note that it is the | ||||||
|  | 			 * responsibility of the caller to close the below relcache entry, | ||||||
|  | 			 * once done using the information being collected here (for | ||||||
|  | 			 * example, in ExecEndModifyTable). | ||||||
|  | 			 */ | ||||||
|  | 			Relation	partrel = heap_open(partrelid, NoLock); | ||||||
|  |  | ||||||
| 			(*num_parted)++; | 			(*num_parted)++; | ||||||
| 			parted_rels = lappend(parted_rels, partrel); | 			parted_rels = lappend(parted_rels, partrel); | ||||||
| 			parted_rel_parents = lappend(parted_rel_parents, parent); | 			parted_rel_parents = lappend(parted_rel_parents, parent); | ||||||
| 			APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents); | 			APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents); | ||||||
| 		} | 		} | ||||||
| 		else |  | ||||||
| 			heap_close(partrel, NoLock); |  | ||||||
|  |  | ||||||
| 		/* |  | ||||||
| 		 * We keep the partitioned ones open until we're done using the |  | ||||||
| 		 * information being collected here (for example, see |  | ||||||
| 		 * ExecEndModifyTable). |  | ||||||
| 		 */ |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
|   | |||||||
| @@ -43,6 +43,7 @@ | |||||||
| #include "access/xact.h" | #include "access/xact.h" | ||||||
| #include "catalog/namespace.h" | #include "catalog/namespace.h" | ||||||
| #include "catalog/partition.h" | #include "catalog/partition.h" | ||||||
|  | #include "catalog/pg_inherits_fn.h" | ||||||
| #include "catalog/pg_publication.h" | #include "catalog/pg_publication.h" | ||||||
| #include "commands/matview.h" | #include "commands/matview.h" | ||||||
| #include "commands/trigger.h" | #include "commands/trigger.h" | ||||||
| @@ -3249,9 +3250,12 @@ ExecSetupPartitionTupleRouting(Relation rel, | |||||||
| 	int			i; | 	int			i; | ||||||
| 	ResultRelInfo *leaf_part_rri; | 	ResultRelInfo *leaf_part_rri; | ||||||
|  |  | ||||||
| 	/* Get the tuple-routing information and lock partitions */ | 	/* | ||||||
| 	*pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, num_parted, | 	 * Get the information about the partition tree after locking all the | ||||||
| 										   &leaf_parts); | 	 * partitions. | ||||||
|  | 	 */ | ||||||
|  | 	(void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL); | ||||||
|  | 	*pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts); | ||||||
| 	*num_partitions = list_length(leaf_parts); | 	*num_partitions = list_length(leaf_parts); | ||||||
| 	*partitions = (ResultRelInfo *) palloc(*num_partitions * | 	*partitions = (ResultRelInfo *) palloc(*num_partitions * | ||||||
| 										   sizeof(ResultRelInfo)); | 										   sizeof(ResultRelInfo)); | ||||||
|   | |||||||
| @@ -87,8 +87,7 @@ extern Expr *get_partition_qual_relid(Oid relid); | |||||||
|  |  | ||||||
| /* For tuple routing */ | /* For tuple routing */ | ||||||
| extern PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel, | extern PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel, | ||||||
| 								 int lockmode, int *num_parted, | 								 int *num_parted, List **leaf_part_oids); | ||||||
| 								 List **leaf_part_oids); |  | ||||||
| extern void FormPartitionKeyDatum(PartitionDispatch pd, | extern void FormPartitionKeyDatum(PartitionDispatch pd, | ||||||
| 					  TupleTableSlot *slot, | 					  TupleTableSlot *slot, | ||||||
| 					  EState *estate, | 					  EState *estate, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user