Clone extended stats in CREATE TABLE (LIKE INCLUDING ALL)

The LIKE INCLUDING ALL clause to CREATE TABLE intuitively indicates cloning of extended statistics on the source table, but it failed to do so. Patch it up so that it does. Also include an INCLUDING STATISTICS option to the LIKE clause, so that the behavior can be requested individually, or excluded individually. While at it, reorder the INCLUDING options, both in code and in docs, in alphabetical order which makes more sense than feature-implementation order that was previously used. Backpatch this to Postgres 10, where extended statistics were introduced, because this is seen as an oversight in a fresh feature which is better to get consistent from the get-go instead of changing only in pg11. In pg11, comments on statistics objects are cloned too. In pg10 they are not, because I (Álvaro) was too coward to change the parse node as required to support it. Also, in pg10 I chose not to renumber the parser symbols for the various INCLUDING options in LIKE, for the same reason. Any corresponding user-visible changes (docs) are backpatched, though. Reported-by: Stephen Froehlich Author: David Rowley Reviewed-by: Álvaro Herrera, Tomas Vondra Discussion: https://postgr.es/m/CY1PR0601MB1927315B45667A1B679D0FD5E5EF0@CY1PR0601MB1927.namprd06.prod.outlook.com
2025-11-06 07:49:08 +03:00 · 2018-03-05 19:37:19 -03:00
parent c2c537c56d
commit 5564c11815
11 changed files with 323 additions and 37 deletions
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1805,7 +1805,8 @@ GetDefaultOpClass(Oid type_id, Oid am_id)
 /*
 *	makeObjectName()
 *
- *	Create a name for an implicitly created index, sequence, constraint, etc.
+ *	Create a name for an implicitly created index, sequence, constraint,
+ *	extended statistics, etc.
 *
 *	The parameters are typically: the original table name, the original field
 *	name, and a "type" string (such as "seq" or "pkey").    The field name
@@ -1981,6 +1982,8 @@ ChooseIndexName(const char *tabname, Oid namespaceId,
 *
 * We know that less than NAMEDATALEN characters will actually be used,
 * so we can truncate the result once we've generated that many.
+ *
+ * XXX See also ChooseExtendedStatisticNameAddition.
 */
 static char *
 ChooseIndexNameAddition(List *colnames)
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -20,6 +20,7 @@
 #include "catalog/namespace.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_statistic_ext.h"
+#include "commands/comment.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
 #include "statistics/statistics.h"
@@ -31,6 +32,11 @@
 #include "utils/typcache.h"


+static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
+							const char *label, Oid namespaceid);
+static char *ChooseExtendedStatisticNameAddition(List *exprs);
+
+
 /* qsort comparator for the attnums in CreateStatistics */
 static int
 compare_int16(const void *a, const void *b)
@@ -51,7 +57,6 @@ CreateStatistics(CreateStatsStmt *stmt)
 	int16		attnums[STATS_MAX_DIMENSIONS];
 	int			numcols = 0;
 	char	   *namestr;
-	NameData	stxname;
 	Oid			statoid;
 	Oid			namespaceId;
 	Oid			stxowner = GetUserId();
@@ -75,31 +80,6 @@ CreateStatistics(CreateStatsStmt *stmt)

 	Assert(IsA(stmt, CreateStatsStmt));

-	/* resolve the pieces of the name (namespace etc.) */
-	namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
-	namestrcpy(&stxname, namestr);
-
-	/*
-	 * Deal with the possibility that the statistics object already exists.
-	 */
-	if (SearchSysCacheExists2(STATEXTNAMENSP,
-							  NameGetDatum(&stxname),
-							  ObjectIdGetDatum(namespaceId)))
-	{
-		if (stmt->if_not_exists)
-		{
-			ereport(NOTICE,
-					(errcode(ERRCODE_DUPLICATE_OBJECT),
-					 errmsg("statistics object \"%s\" already exists, skipping",
-							namestr)));
-			return InvalidObjectAddress;
-		}
-
-		ereport(ERROR,
-				(errcode(ERRCODE_DUPLICATE_OBJECT),
-				 errmsg("statistics object \"%s\" already exists", namestr)));
-	}
-
 	/*
 	 * Examine the FROM clause.  Currently, we only allow it to be a single
 	 * simple table, but later we'll probably allow multiple tables and JOIN
@@ -148,6 +128,45 @@ CreateStatistics(CreateStatsStmt *stmt)
 	Assert(rel);
 	relid = RelationGetRelid(rel);

+	/*
+	 * If the node has a name, split it up and determine creation namespace.
+	 * If not (a possibility not considered by the grammar, but one which can
+	 * occur via the "CREATE TABLE ... (LIKE)" command), then we put the
+	 * object in the same namespace as the relation, and cons up a name for it.
+	 */
+	if (stmt->defnames)
+		namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
+	else
+	{
+		namespaceId = RelationGetNamespace(rel);
+		namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
+											  ChooseExtendedStatisticNameAddition(stmt->exprs),
+											  "stat",
+											  namespaceId);
+	}
+
+	/*
+	 * Deal with the possibility that the statistics object already exists.
+	 */
+	if (SearchSysCacheExists2(STATEXTNAMENSP,
+							  CStringGetDatum(namestr),
+							  ObjectIdGetDatum(namespaceId)))
+	{
+		if (stmt->if_not_exists)
+		{
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("statistics object \"%s\" already exists, skipping",
+							namestr)));
+			relation_close(rel, NoLock);
+			return InvalidObjectAddress;
+		}
+
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("statistics object \"%s\" already exists", namestr)));
+	}
+
 	/*
 	 * Currently, we only allow simple column references in the expression
 	 * list.  That will change someday, and again the grammar already supports
@@ -288,7 +307,7 @@ CreateStatistics(CreateStatsStmt *stmt)
 	memset(values, 0, sizeof(values));
 	memset(nulls, false, sizeof(nulls));
 	values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid);
-	values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname);
+	values[Anum_pg_statistic_ext_stxname - 1] = CStringGetDatum(namestr);
 	values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId);
 	values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner);
 	values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
@@ -340,6 +359,11 @@ CreateStatistics(CreateStatsStmt *stmt)
 	 * STATISTICS, which is more work than it seems worth.
 	 */

+	/* Add any requested comment */
+	if (stmt->stxcomment != NULL)
+		CreateComments(statoid, StatisticExtRelationId, 0,
+					   stmt->stxcomment);
+
 	/* Return stats object's address */
 	return myself;
 }
@@ -405,3 +429,94 @@ UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
 	 * Future types of extended stats will likely require us to work harder.
 	 */
 }
+
+/*
+ * Select a nonconflicting name for a new statistics.
+ *
+ * name1, name2, and label are used the same way as for makeObjectName(),
+ * except that the label can't be NULL; digits will be appended to the label
+ * if needed to create a name that is unique within the specified namespace.
+ *
+ * Returns a palloc'd string.
+ *
+ * Note: it is theoretically possible to get a collision anyway, if someone
+ * else chooses the same name concurrently.  This is fairly unlikely to be
+ * a problem in practice, especially if one is holding a share update
+ * exclusive lock on the relation identified by name1.  However, if choosing
+ * multiple names within a single command, you'd better create the new object
+ * and do CommandCounterIncrement before choosing the next one!
+ */
+static char *
+ChooseExtendedStatisticName(const char *name1, const char *name2,
+							const char *label, Oid namespaceid)
+{
+	int			pass = 0;
+	char	   *stxname = NULL;
+	char		modlabel[NAMEDATALEN];
+
+	/* try the unmodified label first */
+	StrNCpy(modlabel, label, sizeof(modlabel));
+
+	for (;;)
+	{
+		Oid		existingstats;
+
+		stxname = makeObjectName(name1, name2, modlabel);
+
+		existingstats = GetSysCacheOid2(STATEXTNAMENSP,
+										PointerGetDatum(stxname),
+										ObjectIdGetDatum(namespaceid));
+		if (!OidIsValid(existingstats))
+			break;
+
+		/* found a conflict, so try a new name component */
+		pfree(stxname);
+		snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
+	}
+
+	return stxname;
+}
+
+/*
+ * Generate "name2" for a new statistics given the list of column names for it
+ * This will be passed to ChooseExtendedStatisticName along with the parent
+ * table name and a suitable label.
+ *
+ * We know that less than NAMEDATALEN characters will actually be used,
+ * so we can truncate the result once we've generated that many.
+ *
+ * XXX see also ChooseIndexNameAddition.
+ */
+static char *
+ChooseExtendedStatisticNameAddition(List *exprs)
+{
+	char		buf[NAMEDATALEN * 2];
+	int			buflen = 0;
+	ListCell   *lc;
+
+	buf[0] = '\0';
+	foreach(lc, exprs)
+	{
+		ColumnRef *cref = (ColumnRef *) lfirst(lc);
+		const char *name;
+
+		/* It should be one of these, but just skip if it happens not to be */
+		if (!IsA(cref, ColumnRef))
+			continue;
+
+		name = strVal((Value *) linitial(cref->fields));
+
+		if (buflen > 0)
+			buf[buflen++] = '_';	/* insert _ between names */
+
+		/*
+		 * At this point we have buflen <= NAMEDATALEN.  name should be less
+		 * than NAMEDATALEN already, but use strlcpy for paranoia.
+		 */
+		strlcpy(buf + buflen, name, NAMEDATALEN);
+		buflen += strlen(buf + buflen);
+		if (buflen >= NAMEDATALEN)
+			break;
+	}
+	return pstrdup(buf);
+}