/*------------------------------------------------------------------------- * * plancat.c * routines for accessing the system catalogs * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.105 2005/04/14 20:03:24 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include "access/genam.h" #include "access/heapam.h" #include "catalog/pg_amop.h" #include "catalog/pg_inherits.h" #include "catalog/pg_index.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/plancat.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/relcache.h" #include "utils/syscache.h" #include "catalog/catalog.h" #include "miscadmin.h" static void estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples); /* * get_relation_info - * Retrieves catalog information for a given relation. * * Given the Oid of the relation, return the following info into fields * of the RelOptInfo struct: * * min_attr lowest valid AttrNumber * max_attr highest valid AttrNumber * indexlist list of IndexOptInfos for relation's indexes * pages number of pages * tuples number of tuples * * Also, initialize the attr_needed[] and attr_widths[] arrays. In most * cases these are left as zeroes, but sometimes we need to compute attr * widths here, and we may as well cache the results for costsize.c. */ void get_relation_info(Oid relationObjectId, RelOptInfo *rel) { Index varno = rel->relid; Relation relation; bool hasindex; List *indexinfos = NIL; relation = heap_open(relationObjectId, AccessShareLock); rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1; rel->max_attr = RelationGetNumberOfAttributes(relation); Assert(rel->max_attr >= rel->min_attr); rel->attr_needed = (Relids *) palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids)); rel->attr_widths = (int32 *) palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32)); /* * Estimate relation size. */ estimate_rel_size(relation, rel->attr_widths - rel->min_attr, &rel->pages, &rel->tuples); /* * Make list of indexes. Ignore indexes on system catalogs if told * to. */ if (IsIgnoringSystemIndexes() && IsSystemClass(relation->rd_rel)) hasindex = false; else hasindex = relation->rd_rel->relhasindex; if (hasindex) { List *indexoidlist; ListCell *l; indexoidlist = RelationGetIndexList(relation); foreach(l, indexoidlist) { Oid indexoid = lfirst_oid(l); Relation indexRelation; Form_pg_index index; IndexOptInfo *info; int ncolumns; int i; int16 amorderstrategy; /* * Extract info from the relation descriptor for the index. * * Note that we take no lock on the index; we assume our lock on * the parent table will protect the index's schema information. * When and if the executor actually uses the index, it will take * a lock as needed to protect the access to the index contents. */ indexRelation = index_open(indexoid); index = indexRelation->rd_index; info = makeNode(IndexOptInfo); info->indexoid = index->indexrelid; info->rel = rel; info->ncolumns = ncolumns = index->indnatts; /* * Need to make classlist and ordering arrays large enough to * put a terminating 0 at the end of each one. */ info->indexkeys = (int *) palloc(sizeof(int) * ncolumns); info->classlist = (Oid *) palloc0(sizeof(Oid) * (ncolumns + 1)); info->ordering = (Oid *) palloc0(sizeof(Oid) * (ncolumns + 1)); for (i = 0; i < ncolumns; i++) { info->classlist[i] = indexRelation->rd_indclass->values[i]; info->indexkeys[i] = index->indkey.values[i]; } info->relam = indexRelation->rd_rel->relam; info->amcostestimate = index_cost_estimator(indexRelation); /* * Fetch the ordering operators associated with the index, if * any. */ amorderstrategy = indexRelation->rd_am->amorderstrategy; if (amorderstrategy != 0) { int oprindex = amorderstrategy - 1; for (i = 0; i < ncolumns; i++) { info->ordering[i] = indexRelation->rd_operator[oprindex]; oprindex += indexRelation->rd_am->amstrategies; } } /* * Fetch the index expressions and predicate, if any. We must * modify the copies we obtain from the relcache to have the * correct varno for the parent relation, so that they match * up correctly against qual clauses. */ info->indexprs = RelationGetIndexExpressions(indexRelation); info->indpred = RelationGetIndexPredicate(indexRelation); if (info->indexprs && varno != 1) ChangeVarNodes((Node *) info->indexprs, 1, varno, 0); if (info->indpred && varno != 1) ChangeVarNodes((Node *) info->indpred, 1, varno, 0); info->predOK = false; /* set later in indxpath.c */ info->unique = index->indisunique; /* * Estimate the index size. If it's not a partial index, we * lock the number-of-tuples estimate to equal the parent table; * if it is partial then we have to use the same methods as we * would for a table, except we can be sure that the index is * not larger than the table. */ if (info->indpred == NIL) { info->pages = RelationGetNumberOfBlocks(indexRelation); info->tuples = rel->tuples; } else { estimate_rel_size(indexRelation, NULL, &info->pages, &info->tuples); if (info->tuples > rel->tuples) info->tuples = rel->tuples; } /* initialize cached join info to empty */ info->outer_relids = NULL; info->inner_paths = NIL; index_close(indexRelation); indexinfos = lcons(info, indexinfos); } list_free(indexoidlist); } rel->indexlist = indexinfos; /* XXX keep the lock here? */ heap_close(relation, AccessShareLock); } /* * estimate_rel_size - estimate # pages and # tuples in a table or index * * If attr_widths isn't NULL, it points to the zero-index entry of the * relation's attr_width[] cache; we fill this in if we have need to compute * the attribute widths for estimation purposes. */ static void estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples) { BlockNumber curpages; BlockNumber relpages; double reltuples; double density; switch (rel->rd_rel->relkind) { case RELKIND_RELATION: case RELKIND_INDEX: case RELKIND_TOASTVALUE: /* it has storage, ok to call the smgr */ curpages = RelationGetNumberOfBlocks(rel); /* * HACK: if the relation has never yet been vacuumed, use a * minimum estimate of 10 pages. This emulates a desirable * aspect of pre-8.0 behavior, which is that we wouldn't assume * a newly created relation is really small, which saves us from * making really bad plans during initial data loading. (The * plans are not wrong when they are made, but if they are cached * and used again after the table has grown a lot, they are bad.) * It would be better to force replanning if the table size has * changed a lot since the plan was made ... but we don't * currently have any infrastructure for redoing cached plans at * all, so we have to kluge things here instead. * * We approximate "never vacuumed" by "has relpages = 0", which * means this will also fire on genuinely empty relations. Not * great, but fortunately that's a seldom-seen case in the real * world, and it shouldn't degrade the quality of the plan too * much anyway to err in this direction. */ if (curpages < 10 && rel->rd_rel->relpages == 0) curpages = 10; /* report estimated # pages */ *pages = curpages; /* quick exit if rel is clearly empty */ if (curpages == 0) { *tuples = 0; break; } /* coerce values in pg_class to more desirable types */ relpages = (BlockNumber) rel->rd_rel->relpages; reltuples = (double) rel->rd_rel->reltuples; /* * If it's an index, discount the metapage. This is a kluge * because it assumes more than it ought to about index contents; * it's reasonably OK for btrees but a bit suspect otherwise. */ if (rel->rd_rel->relkind == RELKIND_INDEX && relpages > 0) { curpages--; relpages--; } /* estimate number of tuples from previous tuple density */ if (relpages > 0) density = reltuples / (double) relpages; else { /* * When we have no data because the relation was truncated, * estimate tuple width from attribute datatypes. We assume * here that the pages are completely full, which is OK for * tables (since they've presumably not been VACUUMed yet) * but is probably an overestimate for indexes. Fortunately * get_relation_info() can clamp the overestimate to the * parent table's size. * * Note: this code intentionally disregards alignment * considerations, because (a) that would be gilding the * lily considering how crude the estimate is, and (b) * it creates platform dependencies in the default plans * which are kind of a headache for regression testing. */ int32 tuple_width = 0; int i; for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++) { Form_pg_attribute att = rel->rd_att->attrs[i - 1]; int32 item_width; if (att->attisdropped) continue; /* This should match set_rel_width() in costsize.c */ item_width = get_attavgwidth(RelationGetRelid(rel), i); if (item_width <= 0) { item_width = get_typavgwidth(att->atttypid, att->atttypmod); Assert(item_width > 0); } if (attr_widths != NULL) attr_widths[i] = item_width; tuple_width += item_width; } tuple_width += sizeof(HeapTupleHeaderData); tuple_width += sizeof(ItemPointerData); /* note: integer division is intentional here */ density = (BLCKSZ - sizeof(PageHeaderData)) / tuple_width; } *tuples = rint(density * (double) curpages); break; case RELKIND_SEQUENCE: /* Sequences always have a known size */ *pages = 1; *tuples = 1; break; default: /* else it has no disk storage; probably shouldn't get here? */ *pages = 0; *tuples = 0; break; } } /* * build_physical_tlist * * Build a targetlist consisting of exactly the relation's user attributes, * in order. The executor can special-case such tlists to avoid a projection * step at runtime, so we use such tlists preferentially for scan nodes. * * Exception: if there are any dropped columns, we punt and return NIL. * Ideally we would like to handle the dropped-column case too. However this * creates problems for ExecTypeFromTL, which may be asked to build a tupdesc * for a tlist that includes vars of no-longer-existent types. In theory we * could dig out the required info from the pg_attribute entries of the * relation, but that data is not readily available to ExecTypeFromTL. * For now, we don't apply the physical-tlist optimization when there are * dropped cols. */ List * build_physical_tlist(Query *root, RelOptInfo *rel) { Index varno = rel->relid; RangeTblEntry *rte = rt_fetch(varno, root->rtable); Relation relation; List *tlist = NIL; int attrno, numattrs; Assert(rte->rtekind == RTE_RELATION); relation = heap_open(rte->relid, AccessShareLock); numattrs = RelationGetNumberOfAttributes(relation); for (attrno = 1; attrno <= numattrs; attrno++) { Form_pg_attribute att_tup = relation->rd_att->attrs[attrno - 1]; Var *var; if (att_tup->attisdropped) { /* found a dropped col, so punt */ tlist = NIL; break; } var = makeVar(varno, attrno, att_tup->atttypid, att_tup->atttypmod, 0); tlist = lappend(tlist, makeTargetEntry((Expr *) var, attrno, NULL, false)); } heap_close(relation, AccessShareLock); return tlist; } /* * restriction_selectivity * * Returns the selectivity of a specified restriction operator clause. * This code executes registered procedures stored in the * operator relation, by calling the function manager. * * See clause_selectivity() for the meaning of the additional parameters. */ Selectivity restriction_selectivity(Query *root, Oid operator, List *args, int varRelid) { RegProcedure oprrest = get_oprrest(operator); float8 result; /* * if the oprrest procedure is missing for whatever reason, use a * selectivity of 0.5 */ if (!oprrest) return (Selectivity) 0.5; result = DatumGetFloat8(OidFunctionCall4(oprrest, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), Int32GetDatum(varRelid))); if (result < 0.0 || result > 1.0) elog(ERROR, "invalid restriction selectivity: %f", result); return (Selectivity) result; } /* * join_selectivity * * Returns the selectivity of a specified join operator clause. * This code executes registered procedures stored in the * operator relation, by calling the function manager. */ Selectivity join_selectivity(Query *root, Oid operator, List *args, JoinType jointype) { RegProcedure oprjoin = get_oprjoin(operator); float8 result; /* * if the oprjoin procedure is missing for whatever reason, use a * selectivity of 0.5 */ if (!oprjoin) return (Selectivity) 0.5; result = DatumGetFloat8(OidFunctionCall4(oprjoin, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), Int16GetDatum(jointype))); if (result < 0.0 || result > 1.0) elog(ERROR, "invalid join selectivity: %f", result); return (Selectivity) result; } /* * find_inheritance_children * * Returns a list containing the OIDs of all relations which * inherit *directly* from the relation with OID 'inhparent'. * * XXX might be a good idea to create an index on pg_inherits' inhparent * field, so that we can use an indexscan instead of sequential scan here. * However, in typical databases pg_inherits won't have enough entries to * justify an indexscan... */ List * find_inheritance_children(Oid inhparent) { List *list = NIL; Relation relation; HeapScanDesc scan; HeapTuple inheritsTuple; Oid inhrelid; ScanKeyData key[1]; /* * Can skip the scan if pg_class shows the relation has never had a * subclass. */ if (!has_subclass(inhparent)) return NIL; ScanKeyInit(&key[0], Anum_pg_inherits_inhparent, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(inhparent)); relation = heap_open(InheritsRelationId, AccessShareLock); scan = heap_beginscan(relation, SnapshotNow, 1, key); while ((inheritsTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid; list = lappend_oid(list, inhrelid); } heap_endscan(scan); heap_close(relation, AccessShareLock); return list; } /* * has_subclass * * In the current implementation, has_subclass returns whether a * particular class *might* have a subclass. It will not return the * correct result if a class had a subclass which was later dropped. * This is because relhassubclass in pg_class is not updated when a * subclass is dropped, primarily because of concurrency concerns. * * Currently has_subclass is only used as an efficiency hack to skip * unnecessary inheritance searches, so this is OK. */ bool has_subclass(Oid relationId) { HeapTuple tuple; bool result; tuple = SearchSysCache(RELOID, ObjectIdGetDatum(relationId), 0, 0, 0); if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for relation %u", relationId); result = ((Form_pg_class) GETSTRUCT(tuple))->relhassubclass; ReleaseSysCache(tuple); return result; } /* * has_unique_index * * Detect whether there is a unique index on the specified attribute * of the specified relation, thus allowing us to conclude that all * the (non-null) values of the attribute are distinct. */ bool has_unique_index(RelOptInfo *rel, AttrNumber attno) { ListCell *ilist; foreach(ilist, rel->indexlist) { IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); /* * Note: ignore partial indexes, since they don't allow us to * conclude that all attr values are distinct. We don't take any * interest in expressional indexes either. Also, a multicolumn * unique index doesn't allow us to conclude that just the * specified attr is unique. */ if (index->unique && index->ncolumns == 1 && index->indexkeys[0] == attno && index->indpred == NIL) return true; } return false; }