mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Fix postgres_fdw to check shippability of sort clauses properly.
postgres_fdw would push ORDER BY clauses to the remote side without
verifying that the sort operator is safe to ship.  Moreover, it failed
to print a suitable USING clause if the sort operator isn't default
for the sort expression's type.  The net result of this is that the
remote sort might not have anywhere near the semantics we expect,
which'd be disastrous for locally-performed merge joins in particular.
We addressed similar issues in the context of ORDER BY within an
aggregate function call in commit 7012b132d, but failed to notice
that query-level ORDER BY was broken.  Thus, much of the necessary
logic already existed, but it requires refactoring to be usable
in both cases.
Back-patch to all supported branches.  In HEAD only, remove the
core code's copy of find_em_expr_for_rel, which is no longer used
and really should never have been pushed into equivclass.c in the
first place.
Ronan Dunklau, per report from David Rowley;
reviews by David Rowley, Ranier Vilela, and myself
Discussion: https://postgr.es/m/CAApHDvr4OeC2DBVY--zVP83-K=bYrTD7F8SZDhN4g+pj2f2S-A@mail.gmail.com
			
			
This commit is contained in:
		| @@ -42,6 +42,7 @@ | ||||
| #include "catalog/pg_collation.h" | ||||
| #include "catalog/pg_namespace.h" | ||||
| #include "catalog/pg_operator.h" | ||||
| #include "catalog/pg_opfamily.h" | ||||
| #include "catalog/pg_proc.h" | ||||
| #include "catalog/pg_type.h" | ||||
| #include "commands/defrem.h" | ||||
| @@ -179,6 +180,8 @@ static void deparseRangeTblRef(StringInfo buf, PlannerInfo *root, | ||||
| 				   Index ignore_rel, List **ignore_conds, List **params_list); | ||||
| static void deparseAggref(Aggref *node, deparse_expr_cxt *context); | ||||
| static void appendGroupByClause(List *tlist, deparse_expr_cxt *context); | ||||
| static void appendOrderBySuffix(Oid sortop, Oid sortcoltype, bool nulls_first, | ||||
| 								deparse_expr_cxt *context); | ||||
| static void appendAggOrderBy(List *orderList, List *targetList, | ||||
| 				 deparse_expr_cxt *context); | ||||
| static void appendFunctionName(Oid funcid, deparse_expr_cxt *context); | ||||
| @@ -905,6 +908,33 @@ is_foreign_param(PlannerInfo *root, | ||||
| 	return false; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Returns true if it's safe to push down the sort expression described by | ||||
|  * 'pathkey' to the foreign server. | ||||
|  */ | ||||
| bool | ||||
| is_foreign_pathkey(PlannerInfo *root, | ||||
| 				   RelOptInfo *baserel, | ||||
| 				   PathKey *pathkey) | ||||
| { | ||||
| 	EquivalenceClass *pathkey_ec = pathkey->pk_eclass; | ||||
| 	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; | ||||
|  | ||||
| 	/* | ||||
| 	 * is_foreign_expr would detect volatile expressions as well, but checking | ||||
| 	 * ec_has_volatile here saves some cycles. | ||||
| 	 */ | ||||
| 	if (pathkey_ec->ec_has_volatile) | ||||
| 		return false; | ||||
|  | ||||
| 	/* can't push down the sort if the pathkey's opfamily is not shippable */ | ||||
| 	if (!is_shippable(pathkey->pk_opfamily, OperatorFamilyRelationId, fpinfo)) | ||||
| 		return false; | ||||
|  | ||||
| 	/* can push if a suitable EC member exists */ | ||||
| 	return (find_em_for_rel(root, pathkey_ec, baserel) != NULL); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Convert type OID + typmod info into a type name we can ship to the remote | ||||
|  * server.  Someplace else had better have verified that this type name is | ||||
| @@ -3054,22 +3084,38 @@ appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context) | ||||
| 	{ | ||||
| 		SortGroupClause *srt = (SortGroupClause *) lfirst(lc); | ||||
| 		Node	   *sortexpr; | ||||
| 		Oid			sortcoltype; | ||||
| 		TypeCacheEntry *typentry; | ||||
|  | ||||
| 		if (!first) | ||||
| 			appendStringInfoString(buf, ", "); | ||||
| 		first = false; | ||||
|  | ||||
| 		/* Deparse the sort expression proper. */ | ||||
| 		sortexpr = deparseSortGroupClause(srt->tleSortGroupRef, targetList, | ||||
| 										  false, context); | ||||
| 		sortcoltype = exprType(sortexpr); | ||||
| 		/* See whether operator is default < or > for datatype */ | ||||
| 		/* Add decoration as needed. */ | ||||
| 		appendOrderBySuffix(srt->sortop, exprType(sortexpr), srt->nulls_first, | ||||
| 							context); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Append the ASC, DESC, USING <OPERATOR> and NULLS FIRST / NULLS LAST parts | ||||
|  * of an ORDER BY clause. | ||||
|  */ | ||||
| static void | ||||
| appendOrderBySuffix(Oid sortop, Oid sortcoltype, bool nulls_first, | ||||
| 					deparse_expr_cxt *context) | ||||
| { | ||||
| 	StringInfo	buf = context->buf; | ||||
| 	TypeCacheEntry *typentry; | ||||
|  | ||||
| 	/* See whether operator is default < or > for sort expr's datatype. */ | ||||
| 	typentry = lookup_type_cache(sortcoltype, | ||||
| 								 TYPECACHE_LT_OPR | TYPECACHE_GT_OPR); | ||||
| 		if (srt->sortop == typentry->lt_opr) | ||||
|  | ||||
| 	if (sortop == typentry->lt_opr) | ||||
| 		appendStringInfoString(buf, " ASC"); | ||||
| 		else if (srt->sortop == typentry->gt_opr) | ||||
| 	else if (sortop == typentry->gt_opr) | ||||
| 		appendStringInfoString(buf, " DESC"); | ||||
| 	else | ||||
| 	{ | ||||
| @@ -3079,19 +3125,18 @@ appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context) | ||||
| 		appendStringInfoString(buf, " USING "); | ||||
|  | ||||
| 		/* Append operator name. */ | ||||
| 			opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(srt->sortop)); | ||||
| 		opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(sortop)); | ||||
| 		if (!HeapTupleIsValid(opertup)) | ||||
| 				elog(ERROR, "cache lookup failed for operator %u", srt->sortop); | ||||
| 			elog(ERROR, "cache lookup failed for operator %u", sortop); | ||||
| 		operform = (Form_pg_operator) GETSTRUCT(opertup); | ||||
| 		deparseOperatorName(buf, operform); | ||||
| 		ReleaseSysCache(opertup); | ||||
| 	} | ||||
|  | ||||
| 		if (srt->nulls_first) | ||||
| 	if (nulls_first) | ||||
| 		appendStringInfoString(buf, " NULLS FIRST"); | ||||
| 	else | ||||
| 		appendStringInfoString(buf, " NULLS LAST"); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| /* | ||||
| @@ -3174,17 +3219,17 @@ appendGroupByClause(List *tlist, deparse_expr_cxt *context) | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Deparse ORDER BY clause according to the given pathkeys for given base | ||||
|  * relation. From given pathkeys expressions belonging entirely to the given | ||||
|  * base relation are obtained and deparsed. | ||||
|  * Deparse ORDER BY clause defined by the given pathkeys. | ||||
|  * | ||||
|  * We find a suitable pathkey expression (some earlier step | ||||
|  * should have verified that there is one) and deparse it. | ||||
|  */ | ||||
| static void | ||||
| appendOrderByClause(List *pathkeys, deparse_expr_cxt *context) | ||||
| { | ||||
| 	ListCell   *lcell; | ||||
| 	int			nestlevel; | ||||
| 	char	   *delim = " "; | ||||
| 	RelOptInfo *baserel = context->scanrel; | ||||
| 	const char *delim = " "; | ||||
| 	StringInfo	buf = context->buf; | ||||
|  | ||||
| 	/* Make sure any constants in the exprs are printed portably */ | ||||
| @@ -3194,22 +3239,47 @@ appendOrderByClause(List *pathkeys, deparse_expr_cxt *context) | ||||
| 	foreach(lcell, pathkeys) | ||||
| 	{ | ||||
| 		PathKey    *pathkey = lfirst(lcell); | ||||
| 		EquivalenceMember *em; | ||||
| 		Expr	   *em_expr; | ||||
| 		Oid			oprid; | ||||
|  | ||||
| 		em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel); | ||||
| 		Assert(em_expr != NULL); | ||||
| 		em = find_em_for_rel(context->root, | ||||
| 							 pathkey->pk_eclass, | ||||
| 							 context->scanrel); | ||||
|  | ||||
| 		/* | ||||
| 		 * We don't expect any error here; it would mean that shippability | ||||
| 		 * wasn't verified earlier.  For the same reason, we don't recheck | ||||
| 		 * shippability of the sort operator. | ||||
| 		 */ | ||||
| 		if (em == NULL) | ||||
| 			elog(ERROR, "could not find pathkey item to sort"); | ||||
|  | ||||
| 		em_expr = em->em_expr; | ||||
|  | ||||
| 		/* | ||||
| 		 * Lookup the operator corresponding to the strategy in the opclass. | ||||
| 		 * The datatype used by the opfamily is not necessarily the same as | ||||
| 		 * the expression type (for array types for example). | ||||
| 		 */ | ||||
| 		oprid = get_opfamily_member(pathkey->pk_opfamily, | ||||
| 									em->em_datatype, | ||||
| 									em->em_datatype, | ||||
| 									pathkey->pk_strategy); | ||||
| 		if (!OidIsValid(oprid)) | ||||
| 			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", | ||||
| 				 pathkey->pk_strategy, em->em_datatype, em->em_datatype, | ||||
| 				 pathkey->pk_opfamily); | ||||
|  | ||||
| 		appendStringInfoString(buf, delim); | ||||
| 		deparseExpr(em_expr, context); | ||||
| 		if (pathkey->pk_strategy == BTLessStrategyNumber) | ||||
| 			appendStringInfoString(buf, " ASC"); | ||||
| 		else | ||||
| 			appendStringInfoString(buf, " DESC"); | ||||
|  | ||||
| 		if (pathkey->pk_nulls_first) | ||||
| 			appendStringInfoString(buf, " NULLS FIRST"); | ||||
| 		else | ||||
| 			appendStringInfoString(buf, " NULLS LAST"); | ||||
| 		/* | ||||
| 		 * Here we need to use the expression's actual type to discover | ||||
| 		 * whether the desired operator will be the default or not. | ||||
| 		 */ | ||||
| 		appendOrderBySuffix(oprid, exprType((Node *) em_expr), | ||||
| 							pathkey->pk_nulls_first, context); | ||||
|  | ||||
| 		delim = ", "; | ||||
| 	} | ||||
|   | ||||
| @@ -3265,6 +3265,19 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 | ||||
|          Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) | ||||
| (6 rows) | ||||
|  | ||||
| -- This should not be pushed either. | ||||
| explain (verbose, costs off) | ||||
| select * from ft2 order by c1 using operator(public.<^); | ||||
|                                   QUERY PLAN                                    | ||||
| ------------------------------------------------------------------------------- | ||||
|  Sort | ||||
|    Output: c1, c2, c3, c4, c5, c6, c7, c8 | ||||
|    Sort Key: ft2.c1 USING <^ | ||||
|    ->  Foreign Scan on public.ft2 | ||||
|          Output: c1, c2, c3, c4, c5, c6, c7, c8 | ||||
|          Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" | ||||
| (6 rows) | ||||
|  | ||||
| -- Add into extension | ||||
| alter extension postgres_fdw add operator class my_op_class using btree; | ||||
| alter extension postgres_fdw add function my_op_cmp(a int, b int); | ||||
| @@ -3290,6 +3303,16 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 | ||||
|  {6,16,26,36,46,56,66,76,86,96} | ||||
| (1 row) | ||||
|  | ||||
| -- This should be pushed too. | ||||
| explain (verbose, costs off) | ||||
| select * from ft2 order by c1 using operator(public.<^); | ||||
|                                                          QUERY PLAN                                                           | ||||
| ----------------------------------------------------------------------------------------------------------------------------- | ||||
|  Foreign Scan on public.ft2 | ||||
|    Output: c1, c2, c3, c4, c5, c6, c7, c8 | ||||
|    Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" USING OPERATOR(public.<^) NULLS LAST | ||||
| (3 rows) | ||||
|  | ||||
| -- Remove from extension | ||||
| alter extension postgres_fdw drop operator class my_op_class using btree; | ||||
| alter extension postgres_fdw drop function my_op_cmp(a int, b int); | ||||
|   | ||||
| @@ -17,6 +17,7 @@ | ||||
| #include "access/htup_details.h" | ||||
| #include "access/sysattr.h" | ||||
| #include "catalog/pg_class.h" | ||||
| #include "catalog/pg_opfamily.h" | ||||
| #include "commands/defrem.h" | ||||
| #include "commands/explain.h" | ||||
| #include "commands/vacuum.h" | ||||
| @@ -825,8 +826,6 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) | ||||
| 		foreach(lc, root->query_pathkeys) | ||||
| 		{ | ||||
| 			PathKey    *pathkey = (PathKey *) lfirst(lc); | ||||
| 			EquivalenceClass *pathkey_ec = pathkey->pk_eclass; | ||||
| 			Expr	   *em_expr; | ||||
|  | ||||
| 			/* | ||||
| 			 * The planner and executor don't have any clever strategy for | ||||
| @@ -834,13 +833,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) | ||||
| 			 * getting it to be sorted by all of those pathkeys. We'll just | ||||
| 			 * end up resorting the entire data set.  So, unless we can push | ||||
| 			 * down all of the query pathkeys, forget it. | ||||
| 			 * | ||||
| 			 * is_foreign_expr would detect volatile expressions as well, but | ||||
| 			 * checking ec_has_volatile here saves some cycles. | ||||
| 			 */ | ||||
| 			if (pathkey_ec->ec_has_volatile || | ||||
| 				!(em_expr = find_em_expr_for_rel(pathkey_ec, rel)) || | ||||
| 				!is_foreign_expr(root, rel, em_expr)) | ||||
| 			if (!is_foreign_pathkey(root, rel, pathkey)) | ||||
| 			{ | ||||
| 				query_pathkeys_ok = false; | ||||
| 				break; | ||||
| @@ -884,16 +878,19 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) | ||||
| 	foreach(lc, useful_eclass_list) | ||||
| 	{ | ||||
| 		EquivalenceClass *cur_ec = lfirst(lc); | ||||
| 		Expr	   *em_expr; | ||||
| 		PathKey    *pathkey; | ||||
|  | ||||
| 		/* If redundant with what we did above, skip it. */ | ||||
| 		if (cur_ec == query_ec) | ||||
| 			continue; | ||||
|  | ||||
| 		/* Can't push down the sort if the EC's opfamily is not shippable. */ | ||||
| 		if (!is_shippable(linitial_oid(cur_ec->ec_opfamilies), | ||||
| 						  OperatorFamilyRelationId, fpinfo)) | ||||
| 			continue; | ||||
|  | ||||
| 		/* If no pushable expression for this rel, skip it. */ | ||||
| 		em_expr = find_em_expr_for_rel(cur_ec, rel); | ||||
| 		if (em_expr == NULL || !is_foreign_expr(root, rel, em_expr)) | ||||
| 		if (find_em_for_rel(root, cur_ec, rel) == NULL) | ||||
| 			continue; | ||||
|  | ||||
| 		/* Looks like we can generate a pathkey, so let's do it. */ | ||||
| @@ -5916,30 +5913,35 @@ conversion_error_callback(void *arg) | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Find an equivalence class member expression, all of whose Vars, come from | ||||
|  * the indicated relation. | ||||
|  * Given an EquivalenceClass and a foreign relation, find an EC member | ||||
|  * that can be used to sort the relation remotely according to a pathkey | ||||
|  * using this EC. | ||||
|  * | ||||
|  * If there is more than one suitable candidate, return an arbitrary | ||||
|  * one of them.  If there is none, return NULL. | ||||
|  * | ||||
|  * This checks that the EC member expression uses only Vars from the given | ||||
|  * rel and is shippable.  Caller must separately verify that the pathkey's | ||||
|  * ordering operator is shippable. | ||||
|  */ | ||||
| Expr * | ||||
| find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) | ||||
| EquivalenceMember * | ||||
| find_em_for_rel(PlannerInfo *root, EquivalenceClass *ec, RelOptInfo *rel) | ||||
| { | ||||
| 	ListCell   *lc_em; | ||||
| 	ListCell   *lc; | ||||
|  | ||||
| 	foreach(lc_em, ec->ec_members) | ||||
| 	foreach(lc, ec->ec_members) | ||||
| 	{ | ||||
| 		EquivalenceMember *em = lfirst(lc_em); | ||||
| 		EquivalenceMember *em = (EquivalenceMember *) lfirst(lc); | ||||
|  | ||||
| 		if (bms_is_subset(em->em_relids, rel->relids) && | ||||
| 			!bms_is_empty(em->em_relids)) | ||||
| 		{ | ||||
| 		/* | ||||
| 			 * If there is more than one equivalence member whose Vars are | ||||
| 			 * taken entirely from this relation, we'll be content to choose | ||||
| 			 * any one of those. | ||||
| 		 * Note we require !bms_is_empty, else we'd accept constant | ||||
| 		 * expressions which are not suitable for the purpose. | ||||
| 		 */ | ||||
| 			return em->em_expr; | ||||
| 		} | ||||
| 		if (bms_is_subset(em->em_relids, rel->relids) && | ||||
| 			!bms_is_empty(em->em_relids) && | ||||
| 			is_foreign_expr(root, rel, em->em_expr)) | ||||
| 			return em; | ||||
| 	} | ||||
|  | ||||
| 	/* We didn't find any suitable equivalence class expression */ | ||||
| 	return NULL; | ||||
| } | ||||
|   | ||||
| @@ -143,6 +143,9 @@ extern bool is_foreign_expr(PlannerInfo *root, | ||||
| extern bool is_foreign_param(PlannerInfo *root, | ||||
| 				 RelOptInfo *baserel, | ||||
| 				 Expr *expr); | ||||
| extern bool is_foreign_pathkey(PlannerInfo *root, | ||||
| 							   RelOptInfo *baserel, | ||||
| 							   PathKey *pathkey); | ||||
| extern void deparseInsertSql(StringInfo buf, RangeTblEntry *rte, | ||||
| 				 Index rtindex, Relation rel, | ||||
| 				 List *targetAttrs, bool doNothing, List *returningList, | ||||
| @@ -175,7 +178,9 @@ extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel); | ||||
| extern void deparseAnalyzeSql(StringInfo buf, Relation rel, | ||||
| 				  List **retrieved_attrs); | ||||
| extern void deparseStringLiteral(StringInfo buf, const char *val); | ||||
| extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); | ||||
| extern EquivalenceMember *find_em_for_rel(PlannerInfo *root, | ||||
| 										  EquivalenceClass *ec, | ||||
| 										  RelOptInfo *rel); | ||||
| extern List *build_tlist_to_deparse(RelOptInfo *foreignrel); | ||||
| extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, | ||||
| 						RelOptInfo *foreignrel, List *tlist, | ||||
|   | ||||
| @@ -825,6 +825,10 @@ create operator class my_op_class for type int using btree family my_op_family a | ||||
| explain (verbose, costs off) | ||||
| select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; | ||||
|  | ||||
| -- This should not be pushed either. | ||||
| explain (verbose, costs off) | ||||
| select * from ft2 order by c1 using operator(public.<^); | ||||
|  | ||||
| -- Add into extension | ||||
| alter extension postgres_fdw add operator class my_op_class using btree; | ||||
| alter extension postgres_fdw add function my_op_cmp(a int, b int); | ||||
| @@ -839,6 +843,10 @@ explain (verbose, costs off) | ||||
| select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; | ||||
| select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; | ||||
|  | ||||
| -- This should be pushed too. | ||||
| explain (verbose, costs off) | ||||
| select * from ft2 order by c1 using operator(public.<^); | ||||
|  | ||||
| -- Remove from extension | ||||
| alter extension postgres_fdw drop operator class my_op_class using btree; | ||||
| alter extension postgres_fdw drop function my_op_cmp(a int, b int); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user