1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-30 21:42:05 +03:00

Clone extended stats in CREATE TABLE (LIKE INCLUDING ALL)

The LIKE INCLUDING ALL clause to CREATE TABLE intuitively indicates
cloning of extended statistics on the source table, but it failed to do
so.  Patch it up so that it does.  Also include an INCLUDING STATISTICS
option to the LIKE clause, so that the behavior can be requested
individually, or excluded individually.

While at it, reorder the INCLUDING options, both in code and in docs, in
alphabetical order which makes more sense than feature-implementation
order that was previously used.

Backpatch this to Postgres 10, where extended statistics were
introduced, because this is seen as an oversight in a fresh feature
which is better to get consistent from the get-go instead of changing
only in pg11.

In pg11, comments on statistics objects are cloned too.  In pg10 they
are not, because I (Álvaro) was too coward to change the parse node as
required to support it.  Also, in pg10 I chose not to renumber the
parser symbols for the various INCLUDING options in LIKE, for the same
reason.  Any corresponding user-visible changes (docs) are backpatched,
though.

Reported-by: Stephen Froehlich
Author: David Rowley
Reviewed-by: Álvaro Herrera, Tomas Vondra
Discussion: https://postgr.es/m/CY1PR0601MB1927315B45667A1B679D0FD5E5EF0@CY1PR0601MB1927.namprd06.prod.outlook.com
This commit is contained in:
Alvaro Herrera
2018-03-05 19:37:19 -03:00
parent c2c537c56d
commit 5564c11815
11 changed files with 323 additions and 37 deletions

View File

@ -1805,7 +1805,8 @@ GetDefaultOpClass(Oid type_id, Oid am_id)
/*
* makeObjectName()
*
* Create a name for an implicitly created index, sequence, constraint, etc.
* Create a name for an implicitly created index, sequence, constraint,
* extended statistics, etc.
*
* The parameters are typically: the original table name, the original field
* name, and a "type" string (such as "seq" or "pkey"). The field name
@ -1981,6 +1982,8 @@ ChooseIndexName(const char *tabname, Oid namespaceId,
*
* We know that less than NAMEDATALEN characters will actually be used,
* so we can truncate the result once we've generated that many.
*
* XXX See also ChooseExtendedStatisticNameAddition.
*/
static char *
ChooseIndexNameAddition(List *colnames)

View File

@ -20,6 +20,7 @@
#include "catalog/namespace.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_statistic_ext.h"
#include "commands/comment.h"
#include "commands/defrem.h"
#include "miscadmin.h"
#include "statistics/statistics.h"
@ -31,6 +32,11 @@
#include "utils/typcache.h"
static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
const char *label, Oid namespaceid);
static char *ChooseExtendedStatisticNameAddition(List *exprs);
/* qsort comparator for the attnums in CreateStatistics */
static int
compare_int16(const void *a, const void *b)
@ -51,7 +57,6 @@ CreateStatistics(CreateStatsStmt *stmt)
int16 attnums[STATS_MAX_DIMENSIONS];
int numcols = 0;
char *namestr;
NameData stxname;
Oid statoid;
Oid namespaceId;
Oid stxowner = GetUserId();
@ -75,31 +80,6 @@ CreateStatistics(CreateStatsStmt *stmt)
Assert(IsA(stmt, CreateStatsStmt));
/* resolve the pieces of the name (namespace etc.) */
namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
namestrcpy(&stxname, namestr);
/*
* Deal with the possibility that the statistics object already exists.
*/
if (SearchSysCacheExists2(STATEXTNAMENSP,
NameGetDatum(&stxname),
ObjectIdGetDatum(namespaceId)))
{
if (stmt->if_not_exists)
{
ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("statistics object \"%s\" already exists, skipping",
namestr)));
return InvalidObjectAddress;
}
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("statistics object \"%s\" already exists", namestr)));
}
/*
* Examine the FROM clause. Currently, we only allow it to be a single
* simple table, but later we'll probably allow multiple tables and JOIN
@ -148,6 +128,45 @@ CreateStatistics(CreateStatsStmt *stmt)
Assert(rel);
relid = RelationGetRelid(rel);
/*
* If the node has a name, split it up and determine creation namespace.
* If not (a possibility not considered by the grammar, but one which can
* occur via the "CREATE TABLE ... (LIKE)" command), then we put the
* object in the same namespace as the relation, and cons up a name for it.
*/
if (stmt->defnames)
namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
else
{
namespaceId = RelationGetNamespace(rel);
namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
ChooseExtendedStatisticNameAddition(stmt->exprs),
"stat",
namespaceId);
}
/*
* Deal with the possibility that the statistics object already exists.
*/
if (SearchSysCacheExists2(STATEXTNAMENSP,
CStringGetDatum(namestr),
ObjectIdGetDatum(namespaceId)))
{
if (stmt->if_not_exists)
{
ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("statistics object \"%s\" already exists, skipping",
namestr)));
relation_close(rel, NoLock);
return InvalidObjectAddress;
}
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("statistics object \"%s\" already exists", namestr)));
}
/*
* Currently, we only allow simple column references in the expression
* list. That will change someday, and again the grammar already supports
@ -288,7 +307,7 @@ CreateStatistics(CreateStatsStmt *stmt)
memset(values, 0, sizeof(values));
memset(nulls, false, sizeof(nulls));
values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid);
values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname);
values[Anum_pg_statistic_ext_stxname - 1] = CStringGetDatum(namestr);
values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId);
values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner);
values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
@ -340,6 +359,11 @@ CreateStatistics(CreateStatsStmt *stmt)
* STATISTICS, which is more work than it seems worth.
*/
/* Add any requested comment */
if (stmt->stxcomment != NULL)
CreateComments(statoid, StatisticExtRelationId, 0,
stmt->stxcomment);
/* Return stats object's address */
return myself;
}
@ -405,3 +429,94 @@ UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
* Future types of extended stats will likely require us to work harder.
*/
}
/*
* Select a nonconflicting name for a new statistics.
*
* name1, name2, and label are used the same way as for makeObjectName(),
* except that the label can't be NULL; digits will be appended to the label
* if needed to create a name that is unique within the specified namespace.
*
* Returns a palloc'd string.
*
* Note: it is theoretically possible to get a collision anyway, if someone
* else chooses the same name concurrently. This is fairly unlikely to be
* a problem in practice, especially if one is holding a share update
* exclusive lock on the relation identified by name1. However, if choosing
* multiple names within a single command, you'd better create the new object
* and do CommandCounterIncrement before choosing the next one!
*/
static char *
ChooseExtendedStatisticName(const char *name1, const char *name2,
const char *label, Oid namespaceid)
{
int pass = 0;
char *stxname = NULL;
char modlabel[NAMEDATALEN];
/* try the unmodified label first */
StrNCpy(modlabel, label, sizeof(modlabel));
for (;;)
{
Oid existingstats;
stxname = makeObjectName(name1, name2, modlabel);
existingstats = GetSysCacheOid2(STATEXTNAMENSP,
PointerGetDatum(stxname),
ObjectIdGetDatum(namespaceid));
if (!OidIsValid(existingstats))
break;
/* found a conflict, so try a new name component */
pfree(stxname);
snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
}
return stxname;
}
/*
* Generate "name2" for a new statistics given the list of column names for it
* This will be passed to ChooseExtendedStatisticName along with the parent
* table name and a suitable label.
*
* We know that less than NAMEDATALEN characters will actually be used,
* so we can truncate the result once we've generated that many.
*
* XXX see also ChooseIndexNameAddition.
*/
static char *
ChooseExtendedStatisticNameAddition(List *exprs)
{
char buf[NAMEDATALEN * 2];
int buflen = 0;
ListCell *lc;
buf[0] = '\0';
foreach(lc, exprs)
{
ColumnRef *cref = (ColumnRef *) lfirst(lc);
const char *name;
/* It should be one of these, but just skip if it happens not to be */
if (!IsA(cref, ColumnRef))
continue;
name = strVal((Value *) linitial(cref->fields));
if (buflen > 0)
buf[buflen++] = '_'; /* insert _ between names */
/*
* At this point we have buflen <= NAMEDATALEN. name should be less
* than NAMEDATALEN already, but use strlcpy for paranoia.
*/
strlcpy(buf + buflen, name, NAMEDATALEN);
buflen += strlen(buf + buflen);
if (buflen >= NAMEDATALEN)
break;
}
return pstrdup(buf);
}