From f75bf1877ac7ccb297c1a960534a0178b3ac8f96 Mon Sep 17 00:00:00 2001 From: "Thomas G. Lockhart" Date: Thu, 30 Mar 2000 22:22:41 +0000 Subject: [PATCH] Accumulated fixups. Add some chapters on new topics. Change to referencing OASIS/Docbook v3.1 rather than Davenport/Docbook v3.0 Grepped for and fixed apparent tag mangling from emacs "Normalize" operation. Should be the last of those. --- doc/src/sgml/admin.sgml | 10 +- doc/src/sgml/advanced.sgml | 4 +- doc/src/sgml/bug-reporting.sgml | 296 ------ doc/src/sgml/ecpg.sgml | 1727 ++++++++++++++++--------------- doc/src/sgml/func.sgml | 4 +- doc/src/sgml/indices.sgml | 4 +- doc/src/sgml/installation.sgml | 11 +- doc/src/sgml/intro-ag.sgml | 5 +- doc/src/sgml/intro-pg.sgml | 5 +- doc/src/sgml/intro.sgml | 2 +- doc/src/sgml/jdbc.sgml | 4 +- doc/src/sgml/libpgtcl.sgml | 10 + doc/src/sgml/plan.sgml | 263 +++++ doc/src/sgml/postgres.sgml | 172 +-- doc/src/sgml/programmer.sgml | 86 +- doc/src/sgml/refentry.sgml | 2 +- doc/src/sgml/reference.sgml | 11 +- doc/src/sgml/tutorial.sgml | 1 + doc/src/sgml/user.sgml | 56 +- 19 files changed, 1369 insertions(+), 1304 deletions(-) delete mode 100644 doc/src/sgml/bug-reporting.sgml create mode 100644 doc/src/sgml/plan.sgml diff --git a/doc/src/sgml/admin.sgml b/doc/src/sgml/admin.sgml index 4c33426a9d9..b69d9f759f1 100644 --- a/doc/src/sgml/admin.sgml +++ b/doc/src/sgml/admin.sgml @@ -1,11 +1,18 @@ - -Open cursor statement - - -An open cursor statement looks like: - -exec sql open cursor; - -and is ignore and not copied from the output. - - - + + + Open cursor statement + + + An open cursor statement looks like: + +exec sql open cursor; + + and is ignore and not copied from the output. + + + - -Commit statement - - -A commit statement looks like - + + Commit statement + + + A commit statement looks like + exec sql commit; - -and is translated on the output to - + + and is translated on the output to + ECPGcommit(__LINE__); - - - - + + + + - -Rollback statement - - -A rollback statement looks like - + + Rollback statement + + + A rollback statement looks like + exec sql rollback; - -and is translated on the output to - + + and is translated on the output to + ECPGrollback(__LINE__); - - - - + + + + - - -Other statements - - -Other SQL statements are other statements that start with -exec sql and ends with ;. -Everything inbetween is treated -as an SQL statement and parsed for variable substitution. - + + + Other statements + + + Other SQL statements are other statements that start with + exec sql and ends with ;. + Everything inbetween is treated + as an SQL statement and parsed for variable substitution. + - -Variable substitution occur when a symbol starts with a colon -(:). Then a variable with that name is looked for among -the variables that were previously declared within a declare section and -depending on the variable being for input or output the pointers to the -variables are written to the output to allow for access by the function. - + + Variable substitution occur when a symbol starts with a colon + (:). Then a variable with that name is looked for among + the variables that were previously declared within a declare section and + depending on the variable being for input or output the pointers to the + variables are written to the output to allow for access by the function. + - -For every variable that is part of the SQL request -the function gets another ten arguments: + + For every variable that is part of the SQL request + the function gets another ten arguments: - -The type as a special symbol. -A pointer to the value or a pointer to the pointer. -The size of the variable if it is a char or varchar. -Number of elements in the array (for array fetches). -The offset to the next element in the array (for array fetches) -The type of the indicator variable as a special symbol. -A pointer to the value of the indicator variable or a pointer to the pointer of the indicator variable. -0. -Number of elements in the indicator array (for array fetches). -The offset to the next element in the indicator array (for array fetches) - - + + The type as a special symbol. + A pointer to the value or a pointer to the pointer. + The size of the variable if it is a char or varchar. + Number of elements in the array (for array fetches). + The offset to the next element in the array (for array fetches) + The type of the indicator variable as a special symbol. + A pointer to the value of the indicator variable or a pointer to the pointer of the indicator variable. + 0. + Number of elements in the indicator array (for array fetches). + The offset to the next element in the indicator array (for array fetches) + + - - - - - + + + + + - -A Complete Example + + A Complete Example - -Here is a complete example describing the output of the preprocessor of a -file foo.pgc: - + + Here is a complete example describing the output of the preprocessor of a + file foo.pgc: + exec sql begin declare section; int index; int result; exec sql end declare section; ... exec sql select res into :result from mytable where index = :index; - -is translated into: - + + is translated into: + /* Processed by ecpg (2.6.0) */ /* These two include files are added by the preprocessor */ #include <ecpgtype.h>; @@ -923,100 +935,117 @@ ECPGdo(__LINE__, NULL, "select res from mytable where index = ? ", ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT); #line 147 "foo.pgc" - -(the indentation in this manual is added for readability and not -something that the preprocessor can do.) - - + + (the indentation in this manual is added for readability and not + something that the preprocessor can do.) + + - -The Library + + The Library - -The most important function in the library is the ECPGdo -function. It takes a variable amount of arguments. Hopefully we will not run -into machines with limits on the amount of variables that can be -accepted by a vararg function. This could easily add up to 50 or so -arguments. - + + The most important function in the library is the ECPGdo + function. It takes a variable amount of arguments. Hopefully we will not run + into machines with limits on the amount of variables that can be + accepted by a vararg function. This could easily add up to 50 or so + arguments. + - -The arguments are: + + The arguments are: - - -A line number - - -This is a line number for the original line used in error messages only. - - - + + + A line number + + + This is a line number for the original line used in error messages only. + + + - -A string - - -This is the SQL request that is to be issued. -This request is modified -by the input variables, i.e. the variables that where not known at -compile time but are to be entered in the request. Where the variables -should go the string contains ;. - - - + + A string + + + This is the SQL request that is to be issued. + This request is modified + by the input variables, i.e. the variables that where not known at + compile time but are to be entered in the request. Where the variables + should go the string contains ;. + + + - -Input variables - - -As described in the section about the preprocessor every input variable -gets ten arguments. - - - + + Input variables + + + As described in the section about the preprocessor every input variable + gets ten arguments. + + + - -ECPGt_EOIT - - -An enum telling that there are no more input variables. - - - + + ECPGt_EOIT + + + An enum telling that there are no more input variables. + + + - -Output variables - - -As described in the section about the preprocessor every input variable -gets ten arguments. These variables are filled by the function. - - - + + Output variables + + + As described in the section about the preprocessor every input variable + gets ten arguments. These variables are filled by the function. + + + - -ECPGt_EORT - - -An enum telling that there are no more variables. - - - - - + + ECPGt_EORT + + + An enum telling that there are no more variables. + + + + + - -All the SQL statements are performed in one transaction -unless you issue a commit transaction. To get this auto-transaction going -the first statement or the first after statement after a commit or rollback -always begins a transaction. To disable this feature per default use the -'-t' option on the commandline - + + All the SQL statements are performed in one transaction + unless you issue a commit transaction. To get this auto-transaction going + the first statement or the first after statement after a commit or rollback + always begins a transaction. To disable this feature per default use the + option on the commandline. + - -To be completed: entries describing the other entries. - - - - + + To be completed: entries describing the other entries. + + + + + + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index efb7f15de05..ec84abeb92f 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1404,7 +1404,7 @@ Not defined by this name. Implements the intersection operator '#' diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index 32aa5e7892e..2ea3e90879d 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -394,7 +394,7 @@ CREATE MEMSTORE ON <table> COLUMNS <cols> diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index fb48215c85a..048cda62469 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1,10 +1,17 @@ - diff --git a/doc/src/sgml/intro-ag.sgml b/doc/src/sgml/intro-ag.sgml index 83d18555c34..f7ad4c78186 100644 --- a/doc/src/sgml/intro-ag.sgml +++ b/doc/src/sgml/intro-ag.sgml @@ -19,6 +19,7 @@ &info; ¬ation; + &problems; &y2k; &legal; @@ -26,7 +27,7 @@ diff --git a/doc/src/sgml/intro-pg.sgml b/doc/src/sgml/intro-pg.sgml index 093ab42c4be..a8cb214e4ec 100644 --- a/doc/src/sgml/intro-pg.sgml +++ b/doc/src/sgml/intro-pg.sgml @@ -39,6 +39,7 @@ &info; ¬ation; + &problems; &y2k; &legal; @@ -46,7 +47,7 @@ diff --git a/doc/src/sgml/intro.sgml b/doc/src/sgml/intro.sgml index d5eea403dcb..eba5ac85815 100644 --- a/doc/src/sgml/intro.sgml +++ b/doc/src/sgml/intro.sgml @@ -73,7 +73,7 @@ &about; &info; ¬ation; - &bug-reporting; + &problems; &y2k; &legal; diff --git a/doc/src/sgml/jdbc.sgml b/doc/src/sgml/jdbc.sgml index 45a11e0dc80..0e2fc75b594 100644 --- a/doc/src/sgml/jdbc.sgml +++ b/doc/src/sgml/jdbc.sgml @@ -233,13 +233,13 @@ Class.forName("postgresql.Driver"); - jdbc:postgresql://>hos>/database + jdbc:postgresql://host/database - jdbc:postgresql://>hos>">poe>/database + jdbc:postgresql://hostport/database diff --git a/doc/src/sgml/libpgtcl.sgml b/doc/src/sgml/libpgtcl.sgml index 682f7ad1824..724bbff5973 100644 --- a/doc/src/sgml/libpgtcl.sgml +++ b/doc/src/sgml/libpgtcl.sgml @@ -671,6 +671,16 @@ the number of attributes in each tuple. +-list VarName + + + +assign the results to a list of lists. + + + + + -assign arrayName diff --git a/doc/src/sgml/plan.sgml b/doc/src/sgml/plan.sgml new file mode 100644 index 00000000000..ef30a1a2a7f --- /dev/null +++ b/doc/src/sgml/plan.sgml @@ -0,0 +1,263 @@ + + Understanding Performance + + + Query performance can be affected by many things. Some of these can + be manipulated by the user, while others are fundamental to the underlying + design of the system. + + + + Some performance issues, such as index creation and bulk data + loading, are covered elsewhere. This chapter will discuss the + EXPLAIN command, and will show how the details + of a query can affect the query plan, and hence overall + performance. + + + + Using <command>EXPLAIN</command> + + + Author + + Written by Tom Lane, from e-mail dated 2000-03-27. + + + + + Plan-reading is an art that deserves a tutorial, and I haven't + had time to write one. Here is some quick & dirty explanation. + + + + The numbers that are currently quoted by EXPLAIN are: + + + + + Estimated startup cost (time expended before output scan can start, + eg, time to do the sorting in a SORT node). + + + + + + Estimated total cost (if all tuples are retrieved, which they may not + be --- LIMIT will stop short of paying the total cost, for + example). + + + + + + Estimated number of rows output by this plan node. + + + + + + Estimated average width (in bytes) of rows output by this plan + node. + + + + + + + The costs are measured in units of disk page fetches. (There are some + fairly bogus fudge-factors for converting CPU effort estimates into + disk-fetch units; see the SET ref page if you want to play with these.) + It's important to note that the cost of an upper-level node includes + the cost of all its child nodes. It's also important to realize that + the cost only reflects things that the planner/optimizer cares about. + In particular, the cost does not consider the time spent transmitting + result tuples to the frontend --- which could be a pretty dominant + factor in the true elapsed time, but the planner ignores it because + it cannot change it by altering the plan. (Every correct plan will + output the same tuple set, we trust.) + + + + Rows output is a little tricky because it is *not* the number of rows + processed/scanned by the query --- it is usually less, reflecting the + estimated selectivity of any WHERE-clause constraints that are being + applied at this node. + + + + Average width is pretty bogus because the thing really doesn't have + any idea of the average length of variable-length columns. I'm thinking + about improving that in the future, but it may not be worth the trouble, + because the width isn't used for very much. + + + + Here are some examples (using the regress test database after a + vacuum analyze, and current sources): + + +regression=# explain select * from tenk1; +NOTICE: QUERY PLAN: + +Seq Scan on tenk1 (cost=0.00..333.00 rows=10000 width=148) + + + + + About as straightforward as it gets. If you do + + +select * from pg_class where relname = 'tenk1'; + + + you'll find out that tenk1 has 233 disk + pages and 10000 tuples. So the cost is estimated at 233 block + reads, defined as 1.0 apiece, plus 10000 * cpu_tuple_cost which is + currently 0.01 (try show cpu_tuple_cost). + + + + Now let's modify the query to add a qualification clause: + + +regression=# explain select * from tenk1 where unique1 < 1000; +NOTICE: QUERY PLAN: + +Seq Scan on tenk1 (cost=0.00..358.00 rows=1000 width=148) + + + Estimated output rows has gone down because of the WHERE clause. + (The uncannily accurate estimate is just because tenk1 is a particularly + simple case --- the unique1 column has 10000 distinct values ranging + from 0 to 9999, so the estimator's linear interpolation between min and + max column values is dead-on.) However, the scan will still have to + visit all 10000 rows, so the cost hasn't decreased; in fact it has gone + up a bit to reflect the extra CPU time spent checking the WHERE + condition. + + + + Modify the query to restrict the qualification even more: + + +regression=# explain select * from tenk1 where unique1 < 100; +NOTICE: QUERY PLAN: + +Index Scan using tenk1_unique1 on tenk1 (cost=0.00..89.35 rows=100 width=148) + + + and you will see that if we make the WHERE condition selective + enough, the planner will + eventually decide that an indexscan is cheaper than a sequential scan. + This plan will only have to visit 100 tuples because of the index, + so it wins despite the fact that each individual fetch is expensive. + + + + Add another condition to the qualification: + + +regression=# explain select * from tenk1 where unique1 < 100 and +regression-# stringu1 = 'xxx'; +NOTICE: QUERY PLAN: + +Index Scan using tenk1_unique1 on tenk1 (cost=0.00..89.60 rows=1 width=148) + + + The added clause "stringu1 = 'xxx'" reduces the output-rows estimate, + but not the cost because we still have to visit the same set of tuples. + + + + Let's try joining two tables, using the fields we have been discussing: + + +regression=# explain select * from tenk1 t1, tenk2 t2 where t1.unique1 < 100 +regression-# and t1.unique2 = t2.unique2; +NOTICE: QUERY PLAN: + +Nested Loop (cost=0.00..144.07 rows=100 width=296) + -> Index Scan using tenk1_unique1 on tenk1 t1 + (cost=0.00..89.35 rows=100 width=148) + -> Index Scan using tenk2_unique2 on tenk2 t2 + (cost=0.00..0.53 rows=1 width=148) + + + + + In this nested-loop join, the outer scan is the same indexscan we had + in the example before last, and the cost and row count are the same + because we are applying the "unique1 < 100" WHERE clause at this node. + The "t1.unique2 = t2.unique2" clause isn't relevant yet, so it doesn't + affect the row count. For the inner scan, we assume that the current + outer-scan tuple's unique2 value is plugged into the inner indexscan + to produce an indexqual like + "t2.unique2 = constant". So we get the + same inner-scan plan and costs that we'd get from, say, "explain select + * from tenk2 where unique2 = 42". The loop node's costs are then set + on the basis of the outer scan's cost, plus one repetition of the + inner scan for each outer tuple (100 * 0.53, here), plus a little CPU + time for join processing. + + + + In this example the loop's output row count is the same as the product + of the two scans' row counts, but that's not true in general, because + in general you can have WHERE clauses that mention both relations and + so can only be applied at the join point, not to either input scan. + For example, if we added "WHERE ... AND t1.hundred < t2.hundred", + that'd decrease the output row count of the join node, but not change + either input scan. + + + + We can look at variant plans by forcing the planner to disregard + whatever strategy it thought was the winner (a pretty crude tool, + but it's what we've got at the moment): + + +regression=# set enable_nestloop = 'off'; +SET VARIABLE +regression=# explain select * from tenk1 t1, tenk2 t2 where t1.unique1 < 100 +regression-# and t1.unique2 = t2.unique2; +NOTICE: QUERY PLAN: + +Hash Join (cost=89.60..574.10 rows=100 width=296) + -> Seq Scan on tenk2 t2 + (cost=0.00..333.00 rows=10000 width=148) + -> Hash (cost=89.35..89.35 rows=100 width=148) + -> Index Scan using tenk1_unique1 on tenk1 t1 + (cost=0.00..89.35 rows=100 width=148) + + + This plan proposes to extract the 100 interesting rows of tenk1 + using ye same olde indexscan, stash them into an in-memory hash table, + and then do a sequential scan of tenk2, probing into the hash table + for possible matches of "t1.unique2 = t2.unique2" at each tenk2 tuple. + The cost to read tenk1 and set up the hash table is entirely startup + cost for the hash join, since we won't get any tuples out until we can + start reading tenk2. The total time estimate for the join also + includes a pretty hefty charge for CPU time to probe the hash table + 10000 times. Note, however, that we are NOT charging 10000 times 89.35; + the hash table setup is only done once in this plan type. + + + + + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index e94e561ee8e..f189b9b010e 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -1,98 +1,104 @@ - - - - - - + + + + + + + - - - - - + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - + %allfiles; - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + + ]> @@ -181,10 +187,15 @@ Your name here... &indices; &array; &inherit; + &plsql; + &pltcl; + &plperl; &mvcc; &environ; &manage; &storage; + &plan; + &populate; &commands; @@ -237,6 +248,7 @@ Your name here... &xaggr; &rules; &xindex; + &indexcost; &gist; &dfunc; &trigger; diff --git a/doc/src/sgml/programmer.sgml b/doc/src/sgml/programmer.sgml index c01041fa4bf..b559f94ece4 100644 --- a/doc/src/sgml/programmer.sgml +++ b/doc/src/sgml/programmer.sgml @@ -1,9 +1,16 @@ - - - - - - - - - - - - - + + + + + + + + + + + + + ]> @@ -169,8 +179,10 @@ Your name here... &xaggr; &rules; &xindex; + &indexcost; &gist; &xplang; + &plperl; &dfunc; diff --git a/doc/src/sgml/refentry.sgml b/doc/src/sgml/refentry.sgml index 720d8bd1b7f..64d102c1308 100644 --- a/doc/src/sgml/refentry.sgml +++ b/doc/src/sgml/refentry.sgml @@ -1 +1 @@ - + diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index 5dcda9acacd..7dd2eeaafb1 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -1,10 +1,17 @@ - diff --git a/doc/src/sgml/tutorial.sgml b/doc/src/sgml/tutorial.sgml index 0f1d93e45e3..02a210ac0ef 100644 --- a/doc/src/sgml/tutorial.sgml +++ b/doc/src/sgml/tutorial.sgml @@ -11,6 +11,7 @@ + diff --git a/doc/src/sgml/user.sgml b/doc/src/sgml/user.sgml index 3309737fd73..d770f9cc61f 100644 --- a/doc/src/sgml/user.sgml +++ b/doc/src/sgml/user.sgml @@ -1,29 +1,34 @@ - - - - - - + + + + + + + - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + @@ -108,10 +113,15 @@ Your name here... &indices; &array; &inherit; + &plsql; + &pltcl; + &plperl; &mvcc; &environ; &manage; &storage; + &plan; + &populate &commands;