diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 49f81845c92..bbc77db4e25 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.196 2008/08/25 22:42:32 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.197 2008/09/05 21:07:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1584,6 +1584,19 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) * innerstartsel * rescanratio; run_cost += (sort_path.total_cost - sort_path.startup_cost) * (innerendsel - innerstartsel) * rescanratio; + + /* + * If the inner sort is expected to spill to disk, we want to add a + * materialize node to shield it from the need to handle mark/restore. + * This will allow it to perform the last merge pass on-the-fly, while + * in most cases not requiring the materialize to spill to disk. + * Charge an extra cpu_tuple_cost per tuple to account for the + * materialize node. (Keep this estimate in sync with similar ones in + * create_mergejoin_path and create_mergejoin_plan.) + */ + if (relation_byte_size(inner_path_rows, inner_path->parent->width) > + (work_mem * 1024L)) + run_cost += cpu_tuple_cost * inner_path_rows; } else { diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 1a79c96dba4..1195024b5fc 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.247 2008/08/28 23:09:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.248 2008/09/05 21:07:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1545,7 +1545,8 @@ create_mergejoin_plan(PlannerInfo *root, /* * We assume the materialize will not spill to disk, and therefore - * charge just cpu_tuple_cost per tuple. + * charge just cpu_tuple_cost per tuple. (Keep this estimate in sync + * with similar ones in cost_mergejoin and create_mergejoin_path.) */ copy_plan_costsize(matplan, inner_plan); matplan->total_cost += cpu_tuple_cost * matplan->plan_rows; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 5996af2b5d5..8e32895fb27 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.146 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.147 2008/09/05 21:07:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1304,19 +1304,43 @@ create_mergejoin_path(PlannerInfo *root, /* * If we are not sorting the inner path, we may need a materialize node to - * ensure it can be marked/restored. (Sort does support mark/restore, so - * no materialize is needed in that case.) + * ensure it can be marked/restored. * * Since the inner side must be ordered, and only Sorts and IndexScans can - * create order to begin with, you might think there's no problem --- but - * you'd be wrong. Nestloop and merge joins can *preserve* the order of - * their inputs, so they can be selected as the input of a mergejoin, and - * they don't support mark/restore at present. + * create order to begin with, and they both support mark/restore, you + * might think there's no problem --- but you'd be wrong. Nestloop and + * merge joins can *preserve* the order of their inputs, so they can be + * selected as the input of a mergejoin, and they don't support + * mark/restore at present. + * + * Note: Sort supports mark/restore, so no materialize is really needed + * in that case; but one may be desirable anyway to optimize the sort. + * However, since we aren't representing the sort step separately in + * the Path tree, we can't explicitly represent the materialize either. + * So that case is not handled here. Instead, cost_mergejoin has to + * factor in the cost and create_mergejoin_plan has to add the plan node. */ if (innersortkeys == NIL && !ExecSupportsMarkRestore(inner_path->pathtype)) - inner_path = (Path *) - create_material_path(inner_path->parent, inner_path); + { + Path *mpath; + + mpath = (Path *) create_material_path(inner_path->parent, inner_path); + + /* + * We expect the materialize won't spill to disk (it could only do + * so if there were a whole lot of duplicate tuples, which is a case + * cost_mergejoin will avoid choosing anyway). Therefore + * cost_material's cost estimate is bogus and we should charge + * just cpu_tuple_cost per tuple. (Keep this estimate in sync with + * similar ones in cost_mergejoin and create_mergejoin_plan.) + */ + mpath->startup_cost = inner_path->startup_cost; + mpath->total_cost = inner_path->total_cost; + mpath->total_cost += cpu_tuple_cost * inner_path->parent->rows; + + inner_path = mpath; + } pathnode->jpath.path.pathtype = T_MergeJoin; pathnode->jpath.path.parent = joinrel;