mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Avoid O(N^2) cost in ExecFindRowMark().
If there are many ExecRowMark structs, we spent O(N^2) time in ExecFindRowMark during executor startup. Once upon a time this was not of great concern, but the addition of native partitioning has squeezed out enough other costs that this can become the dominant overhead in some use-cases for tables with many partitions. To fix, simply replace that List data structure with an array. This adds a little bit of cost to execCurrentOf(), but not much, and anyway that code path is neither of large importance nor very efficient now. If we ever decide it is a bottleneck, constructing a hash table for lookup-by-tableoid would likely be the thing to do. Per complaint from Amit Langote, though this is different from his fix proposal. Discussion: https://postgr.es/m/468c85d9-540e-66a2-1dde-fec2b741e688@lab.ntt.co.jp
This commit is contained in:
		| @@ -91,21 +91,22 @@ execCurrentOf(CurrentOfExpr *cexpr, | |||||||
| 	 * the other code can't, while the non-FOR-UPDATE case allows use of WHERE | 	 * the other code can't, while the non-FOR-UPDATE case allows use of WHERE | ||||||
| 	 * CURRENT OF with an insensitive cursor. | 	 * CURRENT OF with an insensitive cursor. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (queryDesc->estate->es_rowMarks) | 	if (queryDesc->estate->es_rowmarks) | ||||||
| 	{ | 	{ | ||||||
| 		ExecRowMark *erm; | 		ExecRowMark *erm; | ||||||
| 		ListCell   *lc; | 		Index		i; | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * Here, the query must have exactly one FOR UPDATE/SHARE reference to | 		 * Here, the query must have exactly one FOR UPDATE/SHARE reference to | ||||||
| 		 * the target table, and we dig the ctid info out of that. | 		 * the target table, and we dig the ctid info out of that. | ||||||
| 		 */ | 		 */ | ||||||
| 		erm = NULL; | 		erm = NULL; | ||||||
| 		foreach(lc, queryDesc->estate->es_rowMarks) | 		for (i = 0; i < queryDesc->estate->es_range_table_size; i++) | ||||||
| 		{ | 		{ | ||||||
| 			ExecRowMark *thiserm = (ExecRowMark *) lfirst(lc); | 			ExecRowMark *thiserm = queryDesc->estate->es_rowmarks[i]; | ||||||
|  |  | ||||||
| 			if (!RowMarkRequiresRowShareLock(thiserm->markType)) | 			if (thiserm == NULL || | ||||||
|  | 				!RowMarkRequiresRowShareLock(thiserm->markType)) | ||||||
| 				continue;		/* ignore non-FOR UPDATE/SHARE items */ | 				continue;		/* ignore non-FOR UPDATE/SHARE items */ | ||||||
|  |  | ||||||
| 			if (thiserm->relid == table_oid) | 			if (thiserm->relid == table_oid) | ||||||
|   | |||||||
| @@ -909,9 +909,12 @@ InitPlan(QueryDesc *queryDesc, int eflags) | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Next, build the ExecRowMark list from the PlanRowMark(s), if any. | 	 * Next, build the ExecRowMark array from the PlanRowMark(s), if any. | ||||||
| 	 */ | 	 */ | ||||||
| 	estate->es_rowMarks = NIL; | 	if (plannedstmt->rowMarks) | ||||||
|  | 	{ | ||||||
|  | 		estate->es_rowmarks = (ExecRowMark **) | ||||||
|  | 			palloc0(estate->es_range_table_size * sizeof(ExecRowMark *)); | ||||||
| 		foreach(l, plannedstmt->rowMarks) | 		foreach(l, plannedstmt->rowMarks) | ||||||
| 		{ | 		{ | ||||||
| 			PlanRowMark *rc = (PlanRowMark *) lfirst(l); | 			PlanRowMark *rc = (PlanRowMark *) lfirst(l); | ||||||
| @@ -963,7 +966,11 @@ InitPlan(QueryDesc *queryDesc, int eflags) | |||||||
| 			ItemPointerSetInvalid(&(erm->curCtid)); | 			ItemPointerSetInvalid(&(erm->curCtid)); | ||||||
| 			erm->ermExtra = NULL; | 			erm->ermExtra = NULL; | ||||||
|  |  | ||||||
| 		estate->es_rowMarks = lappend(estate->es_rowMarks, erm); | 			Assert(erm->rti > 0 && erm->rti <= estate->es_range_table_size && | ||||||
|  | 				   estate->es_rowmarks[erm->rti - 1] == NULL); | ||||||
|  |  | ||||||
|  | 			estate->es_rowmarks[erm->rti - 1] = erm; | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| @@ -2394,13 +2401,12 @@ ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo) | |||||||
| ExecRowMark * | ExecRowMark * | ||||||
| ExecFindRowMark(EState *estate, Index rti, bool missing_ok) | ExecFindRowMark(EState *estate, Index rti, bool missing_ok) | ||||||
| { | { | ||||||
| 	ListCell   *lc; | 	if (rti > 0 && rti <= estate->es_range_table_size && | ||||||
|  | 		estate->es_rowmarks != NULL) | ||||||
| 	foreach(lc, estate->es_rowMarks) |  | ||||||
| 	{ | 	{ | ||||||
| 		ExecRowMark *erm = (ExecRowMark *) lfirst(lc); | 		ExecRowMark *erm = estate->es_rowmarks[rti - 1]; | ||||||
|  |  | ||||||
| 		if (erm->rti == rti) | 		if (erm) | ||||||
| 			return erm; | 			return erm; | ||||||
| 	} | 	} | ||||||
| 	if (!missing_ok) | 	if (!missing_ok) | ||||||
| @@ -3131,6 +3137,7 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree) | |||||||
| 	estate->es_range_table_array = parentestate->es_range_table_array; | 	estate->es_range_table_array = parentestate->es_range_table_array; | ||||||
| 	estate->es_range_table_size = parentestate->es_range_table_size; | 	estate->es_range_table_size = parentestate->es_range_table_size; | ||||||
| 	estate->es_relations = parentestate->es_relations; | 	estate->es_relations = parentestate->es_relations; | ||||||
|  | 	estate->es_rowmarks = parentestate->es_rowmarks; | ||||||
| 	estate->es_plannedstmt = parentestate->es_plannedstmt; | 	estate->es_plannedstmt = parentestate->es_plannedstmt; | ||||||
| 	estate->es_junkFilter = parentestate->es_junkFilter; | 	estate->es_junkFilter = parentestate->es_junkFilter; | ||||||
| 	estate->es_output_cid = parentestate->es_output_cid; | 	estate->es_output_cid = parentestate->es_output_cid; | ||||||
| @@ -3148,7 +3155,6 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree) | |||||||
| 	} | 	} | ||||||
| 	/* es_result_relation_info must NOT be copied */ | 	/* es_result_relation_info must NOT be copied */ | ||||||
| 	/* es_trig_target_relations must NOT be copied */ | 	/* es_trig_target_relations must NOT be copied */ | ||||||
| 	estate->es_rowMarks = parentestate->es_rowMarks; |  | ||||||
| 	estate->es_top_eflags = parentestate->es_top_eflags; | 	estate->es_top_eflags = parentestate->es_top_eflags; | ||||||
| 	estate->es_instrument = parentestate->es_instrument; | 	estate->es_instrument = parentestate->es_instrument; | ||||||
| 	/* es_auxmodifytables must NOT be copied */ | 	/* es_auxmodifytables must NOT be copied */ | ||||||
|   | |||||||
| @@ -113,6 +113,7 @@ CreateExecutorState(void) | |||||||
| 	estate->es_range_table_array = NULL; | 	estate->es_range_table_array = NULL; | ||||||
| 	estate->es_range_table_size = 0; | 	estate->es_range_table_size = 0; | ||||||
| 	estate->es_relations = NULL; | 	estate->es_relations = NULL; | ||||||
|  | 	estate->es_rowmarks = NULL; | ||||||
| 	estate->es_plannedstmt = NULL; | 	estate->es_plannedstmt = NULL; | ||||||
|  |  | ||||||
| 	estate->es_junkFilter = NULL; | 	estate->es_junkFilter = NULL; | ||||||
| @@ -142,8 +143,6 @@ CreateExecutorState(void) | |||||||
|  |  | ||||||
| 	estate->es_tupleTable = NIL; | 	estate->es_tupleTable = NIL; | ||||||
|  |  | ||||||
| 	estate->es_rowMarks = NIL; |  | ||||||
|  |  | ||||||
| 	estate->es_processed = 0; | 	estate->es_processed = 0; | ||||||
| 	estate->es_lastoid = InvalidOid; | 	estate->es_lastoid = InvalidOid; | ||||||
|  |  | ||||||
| @@ -709,6 +708,12 @@ ExecInitRangeTable(EState *estate, List *rangeTable) | |||||||
| 	 */ | 	 */ | ||||||
| 	estate->es_relations = (Relation *) | 	estate->es_relations = (Relation *) | ||||||
| 		palloc0(estate->es_range_table_size * sizeof(Relation)); | 		palloc0(estate->es_range_table_size * sizeof(Relation)); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * es_rowmarks is also parallel to the es_range_table_array, but it's | ||||||
|  | 	 * allocated only if needed. | ||||||
|  | 	 */ | ||||||
|  | 	estate->es_rowmarks = NULL; | ||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|   | |||||||
| @@ -34,6 +34,7 @@ | |||||||
|  |  | ||||||
| struct PlanState;				/* forward references in this file */ | struct PlanState;				/* forward references in this file */ | ||||||
| struct ParallelHashJoinState; | struct ParallelHashJoinState; | ||||||
|  | struct ExecRowMark; | ||||||
| struct ExprState; | struct ExprState; | ||||||
| struct ExprContext; | struct ExprContext; | ||||||
| struct RangeTblEntry;			/* avoid including parsenodes.h here */ | struct RangeTblEntry;			/* avoid including parsenodes.h here */ | ||||||
| @@ -491,6 +492,8 @@ typedef struct EState | |||||||
| 	Index		es_range_table_size;	/* size of the range table arrays */ | 	Index		es_range_table_size;	/* size of the range table arrays */ | ||||||
| 	Relation   *es_relations;	/* Array of per-range-table-entry Relation | 	Relation   *es_relations;	/* Array of per-range-table-entry Relation | ||||||
| 								 * pointers, or NULL if not yet opened */ | 								 * pointers, or NULL if not yet opened */ | ||||||
|  | 	struct ExecRowMark **es_rowmarks;	/* Array of per-range-table-entry | ||||||
|  | 										 * ExecRowMarks, or NULL if none */ | ||||||
| 	PlannedStmt *es_plannedstmt;	/* link to top of plan tree */ | 	PlannedStmt *es_plannedstmt;	/* link to top of plan tree */ | ||||||
| 	const char *es_sourceText;	/* Source text from QueryDesc */ | 	const char *es_sourceText;	/* Source text from QueryDesc */ | ||||||
|  |  | ||||||
| @@ -537,8 +540,6 @@ typedef struct EState | |||||||
|  |  | ||||||
| 	List	   *es_tupleTable;	/* List of TupleTableSlots */ | 	List	   *es_tupleTable;	/* List of TupleTableSlots */ | ||||||
|  |  | ||||||
| 	List	   *es_rowMarks;	/* List of ExecRowMarks */ |  | ||||||
|  |  | ||||||
| 	uint64		es_processed;	/* # of tuples processed */ | 	uint64		es_processed;	/* # of tuples processed */ | ||||||
| 	Oid			es_lastoid;		/* last oid processed (by INSERT) */ | 	Oid			es_lastoid;		/* last oid processed (by INSERT) */ | ||||||
|  |  | ||||||
| @@ -607,7 +608,9 @@ typedef struct EState | |||||||
|  * node that sources the relation (e.g., for a foreign table the FDW can use |  * node that sources the relation (e.g., for a foreign table the FDW can use | ||||||
|  * ermExtra to hold information). |  * ermExtra to hold information). | ||||||
|  * |  * | ||||||
|  * EState->es_rowMarks is a list of these structs. |  * EState->es_rowmarks is an array of these structs, indexed by RT index, | ||||||
|  |  * with NULLs for irrelevant RT indexes.  es_rowmarks itself is NULL if | ||||||
|  |  * there are no rowmarks. | ||||||
|  */ |  */ | ||||||
| typedef struct ExecRowMark | typedef struct ExecRowMark | ||||||
| { | { | ||||||
| @@ -629,7 +632,7 @@ typedef struct ExecRowMark | |||||||
|  *	   additional runtime representation of FOR [KEY] UPDATE/SHARE clauses |  *	   additional runtime representation of FOR [KEY] UPDATE/SHARE clauses | ||||||
|  * |  * | ||||||
|  * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to |  * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to | ||||||
|  * deal with.  In addition to a pointer to the related entry in es_rowMarks, |  * deal with.  In addition to a pointer to the related entry in es_rowmarks, | ||||||
|  * this struct carries the column number(s) of the resjunk columns associated |  * this struct carries the column number(s) of the resjunk columns associated | ||||||
|  * with the rowmark (see comments for PlanRowMark for more detail).  In the |  * with the rowmark (see comments for PlanRowMark for more detail).  In the | ||||||
|  * case of ModifyTable, there has to be a separate ExecAuxRowMark list for |  * case of ModifyTable, there has to be a separate ExecAuxRowMark list for | ||||||
| @@ -638,7 +641,7 @@ typedef struct ExecRowMark | |||||||
|  */ |  */ | ||||||
| typedef struct ExecAuxRowMark | typedef struct ExecAuxRowMark | ||||||
| { | { | ||||||
| 	ExecRowMark *rowmark;		/* related entry in es_rowMarks */ | 	ExecRowMark *rowmark;		/* related entry in es_rowmarks */ | ||||||
| 	AttrNumber	ctidAttNo;		/* resno of ctid junk attribute, if any */ | 	AttrNumber	ctidAttNo;		/* resno of ctid junk attribute, if any */ | ||||||
| 	AttrNumber	toidAttNo;		/* resno of tableoid junk attribute, if any */ | 	AttrNumber	toidAttNo;		/* resno of tableoid junk attribute, if any */ | ||||||
| 	AttrNumber	wholeAttNo;		/* resno of whole-row junk attribute, if any */ | 	AttrNumber	wholeAttNo;		/* resno of whole-row junk attribute, if any */ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user