diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 9111b56e92a..42b6d0a2cb9 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node, static bool subplan_is_hashable(Plan *plan); static bool testexpr_is_hashable(Node *testexpr); static bool hash_ok_operator(OpExpr *expr); -static bool simplify_EXISTS_query(Query *query); +static bool simplify_EXISTS_query(PlannerInfo *root, Query *query); static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, Node **testexpr, List **paramIds); static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root); @@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, * If it's an EXISTS subplan, we might be able to simplify it. */ if (subLinkType == EXISTS_SUBLINK) - simple_exists = simplify_EXISTS_query(subquery); + simple_exists = simplify_EXISTS_query(root, subquery); /* * For an EXISTS subplan, tell lower-level planner to expect that only the @@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, /* Make a second copy of the original subquery */ subquery = (Query *) copyObject(orig_subquery); /* and re-simplify */ - simple_exists = simplify_EXISTS_query(subquery); + simple_exists = simplify_EXISTS_query(root, subquery); Assert(simple_exists); /* See if it can be converted to an ANY query */ subquery = convert_EXISTS_to_ANY(root, subquery, @@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * targetlist, we have to fail, because the pullup operation leaves us * with noplace to evaluate the targetlist. */ - if (!simplify_EXISTS_query(subselect)) + if (!simplify_EXISTS_query(root, subselect)) return NULL; /* @@ -1486,13 +1486,14 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * Returns TRUE if was able to discard the targetlist, else FALSE. */ static bool -simplify_EXISTS_query(Query *query) +simplify_EXISTS_query(PlannerInfo *root, Query *query) { /* * We don't try to simplify at all if the query uses set operations, - * aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; - * none of these seem likely in normal usage and their possible effects - * are complex. + * aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE; none + * of these seem likely in normal usage and their possible effects are + * complex. (Note: we could ignore an "OFFSET 0" clause, but that + * traditionally is used as an optimization fence, so we don't.) */ if (query->commandType != CMD_SELECT || query->setOperations || @@ -1501,10 +1502,43 @@ simplify_EXISTS_query(Query *query) query->hasModifyingCTE || query->havingQual || query->limitOffset || - query->limitCount || query->rowMarks) return false; + /* + * LIMIT with a constant positive (or NULL) value doesn't affect the + * semantics of EXISTS, so let's ignore such clauses. This is worth doing + * because people accustomed to certain other DBMSes may be in the habit + * of writing EXISTS(SELECT ... LIMIT 1) as an optimization. If there's a + * LIMIT with anything else as argument, though, we can't simplify. + */ + if (query->limitCount) + { + /* + * The LIMIT clause has not yet been through eval_const_expressions, + * so we have to apply that here. It might seem like this is a waste + * of cycles, since the only case plausibly worth worrying about is + * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)", + * so we have to fold constants or we're not going to recognize it. + */ + Node *node = eval_const_expressions(root, query->limitCount); + Const *limit; + + /* Might as well update the query if we simplified the clause. */ + query->limitCount = node; + + if (!IsA(node, Const)) + return false; + + limit = (Const *) node; + Assert(limit->consttype == INT8OID); + if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0) + return false; + + /* Whether or not the targetlist is safe, we can drop the LIMIT. */ + query->limitCount = NULL; + } + /* * Mustn't throw away the targetlist if it contains set-returning * functions; those could affect whether zero rows are returned! diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 01c91308f33..b14410fd222 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -221,6 +221,46 @@ from int8_tbl group by q1 order by q1; 4567890123456789 | 0.6 (2 rows) +-- +-- Check EXISTS simplification with LIMIT +-- +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit null); + QUERY PLAN +------------------------------------ + Hash Semi Join + Hash Cond: (o.f1 = i.f1) + -> Seq Scan on int4_tbl o + -> Hash + -> Seq Scan on int4_tbl i +(5 rows) + +explain (costs off) +select * from int4_tbl o where not exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 1); + QUERY PLAN +------------------------------------ + Hash Anti Join + Hash Cond: (o.f1 = i.f1) + -> Seq Scan on int4_tbl o + -> Hash + -> Seq Scan on int4_tbl i +(5 rows) + +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 0); + QUERY PLAN +-------------------------------------- + Seq Scan on int4_tbl o + Filter: (SubPlan 1) + SubPlan 1 + -> Limit + -> Seq Scan on int4_tbl i + Filter: (f1 = o.f1) +(6 rows) + -- -- Test cases to catch unpleasant interactions between IN-join processing -- and subquery pullup. diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 56707e26bbf..4be2e40a000 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -92,6 +92,19 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field" select q1, float8(count(*)) / (select count(*) from int8_tbl) from int8_tbl group by q1 order by q1; +-- +-- Check EXISTS simplification with LIMIT +-- +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit null); +explain (costs off) +select * from int4_tbl o where not exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 1); +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 0); + -- -- Test cases to catch unpleasant interactions between IN-join processing -- and subquery pullup.