1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-01 21:31:19 +03:00

Change unknown-type literals to type text in SELECT and RETURNING lists.

Previously, we left such literals alone if the query or subquery had
no properties forcing a type decision to be made (such as an ORDER BY or
DISTINCT clause using that output column).  This meant that "unknown" could
be an exposed output column type, which has never been a great idea because
it could result in strange failures later on.  For example, an outer query
that tried to do any operations on an unknown-type subquery output would
generally fail with some weird error like "failed to find conversion
function from unknown to text" or "could not determine which collation to
use for string comparison".  Also, if the case occurred in a CREATE VIEW's
query then the view would have an unknown-type column, causing similar
failures in queries trying to use the view.

To fix, at the tail end of parse analysis of a query, forcibly convert any
remaining "unknown" literals in its SELECT or RETURNING list to type text.
However, provide a switch to suppress that, and use it in the cases of
SELECT inside a set operation or INSERT command.  In those cases we already
had type resolution rules that make use of context information from outside
the subquery proper, and we don't want to change that behavior.

Also, change creation of an unknown-type column in a relation from a
warning to a hard error.  The error should be unreachable now in CREATE
VIEW or CREATE MATVIEW, but it's still possible to explicitly say "unknown"
in CREATE TABLE or CREATE (composite) TYPE.  We want to forbid that because
it's nothing but a foot-gun.

This change creates a pg_upgrade failure case: a matview that contains an
unknown-type column can't be pg_upgraded, because reparsing the matview's
defining query will now decide that the column is of type text, which
doesn't match the cstring-like storage that the old materialized column
would actually have.  Add a checking pass to detect that.  While at it,
we can detect tables or composite types that would fail, essentially
for free.  Those would fail safely anyway later on, but we might as
well fail earlier.

This patch is by me, but it owes something to previous investigations
by Rahila Syed.  Also thanks to Ashutosh Bapat and Michael Paquier for
review.

Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
This commit is contained in:
Tom Lane
2017-01-25 09:17:18 -05:00
parent 123f03ba2c
commit 1e7c4bb004
26 changed files with 386 additions and 36 deletions

View File

@@ -156,13 +156,15 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
Query *
parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
CommonTableExpr *parentCTE,
bool locked_from_parent)
bool locked_from_parent,
bool resolve_unknowns)
{
ParseState *pstate = make_parsestate(parentParseState);
Query *query;
pstate->p_parent_cte = parentCTE;
pstate->p_locked_from_parent = locked_from_parent;
pstate->p_resolve_unknowns = resolve_unknowns;
query = transformStmt(pstate, parseTree);
@@ -570,10 +572,17 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
* otherwise the behavior of SELECT within INSERT might be different
* from a stand-alone SELECT. (Indeed, Postgres up through 6.5 had
* bugs of just that nature...)
*
* The sole exception is that we prevent resolving unknown-type
* outputs as TEXT. This does not change the semantics since if the
* column type matters semantically, it would have been resolved to
* something else anyway. Doing this lets us resolve such outputs as
* the target column's type, which we handle below.
*/
sub_pstate->p_rtable = sub_rtable;
sub_pstate->p_joinexprs = NIL; /* sub_rtable has no joins */
sub_pstate->p_namespace = sub_namespace;
sub_pstate->p_resolve_unknowns = false;
selectQuery = transformStmt(sub_pstate, stmt->selectStmt);
@@ -1269,6 +1278,10 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
pstate->p_windowdefs,
&qry->targetList);
/* resolve any still-unresolved output columns as being type text */
if (pstate->p_resolve_unknowns)
resolveTargetListUnknowns(pstate, qry->targetList);
qry->rtable = pstate->p_rtable;
qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
@@ -1843,11 +1856,19 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
/*
* Transform SelectStmt into a Query.
*
* This works the same as SELECT transformation normally would, except
* that we prevent resolving unknown-type outputs as TEXT. This does
* not change the subquery's semantics since if the column type
* matters semantically, it would have been resolved to something else
* anyway. Doing this lets us resolve such outputs using
* select_common_type(), below.
*
* Note: previously transformed sub-queries don't affect the parsing
* of this sub-query, because they are not in the toplevel pstate's
* namespace list.
*/
selectQuery = parse_sub_analyze((Node *) stmt, pstate, NULL, false);
selectQuery = parse_sub_analyze((Node *) stmt, pstate,
NULL, false, false);
/*
* Check for bogus references to Vars on the current query level (but
@@ -2350,6 +2371,10 @@ transformReturningList(ParseState *pstate, List *returningList)
/* mark column origins */
markTargetListOrigins(pstate, rlist);
/* resolve any still-unresolved output columns as being type text */
if (pstate->p_resolve_unknowns)
resolveTargetListUnknowns(pstate, rlist);
/* restore state */
pstate->p_next_resno = save_next_resno;

View File

@@ -471,7 +471,8 @@ transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
* Analyze and transform the subquery.
*/
query = parse_sub_analyze(r->subquery, pstate, NULL,
isLockedRefname(pstate, r->alias->aliasname));
isLockedRefname(pstate, r->alias->aliasname),
true);
/* Restore state */
pstate->p_lateral_active = false;

View File

@@ -241,7 +241,7 @@ analyzeCTE(ParseState *pstate, CommonTableExpr *cte)
/* Analysis not done already */
Assert(!IsA(cte->ctequery, Query));
query = parse_sub_analyze(cte->ctequery, pstate, cte, false);
query = parse_sub_analyze(cte->ctequery, pstate, cte, false, true);
cte->ctequery = (Node *) query;
/*
@@ -393,11 +393,10 @@ analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist)
/*
* If the CTE is recursive, force the exposed column type of any
* "unknown" column to "text". This corresponds to the fact that
* SELECT 'foo' UNION SELECT 'bar' will ultimately produce text. We
* might see "unknown" as a result of an untyped literal in the
* non-recursive term's select list, and if we don't convert to text
* then we'll have a mismatch against the UNION result.
* "unknown" column to "text". We must deal with this here because
* we're called on the non-recursive term before there's been any
* attempt to force unknown output columns to some other type. We
* have to resolve unknowns before looking at the recursive term.
*
* The column might contain 'foo' COLLATE "bar", so don't override
* collation if it's already set.

View File

@@ -1846,7 +1846,7 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
/*
* OK, let's transform the sub-SELECT.
*/
qtree = parse_sub_analyze(sublink->subselect, pstate, NULL, false);
qtree = parse_sub_analyze(sublink->subselect, pstate, NULL, false, true);
/*
* Check that we got a SELECT. Anything else should be impossible given

View File

@@ -51,6 +51,7 @@ make_parsestate(ParseState *parentParseState)
/* Fill in fields that don't start at null/false/zero */
pstate->p_next_resno = 1;
pstate->p_resolve_unknowns = true;
if (parentParseState)
{

View File

@@ -288,13 +288,42 @@ transformExpressionList(ParseState *pstate, List *exprlist,
}
/*
* resolveTargetListUnknowns()
* Convert any unknown-type targetlist entries to type TEXT.
*
* We do this after we've exhausted all other ways of identifying the output
* column types of a query.
*/
void
resolveTargetListUnknowns(ParseState *pstate, List *targetlist)
{
ListCell *l;
foreach(l, targetlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
Oid restype = exprType((Node *) tle->expr);
if (restype == UNKNOWNOID)
{
tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
restype, TEXTOID, -1,
COERCION_IMPLICIT,
COERCE_IMPLICIT_CAST,
-1);
}
}
}
/*
* markTargetListOrigins()
* Mark targetlist columns that are simple Vars with the source
* table's OID and column number.
*
* Currently, this is done only for SELECT targetlists, since we only
* need the info if we are going to send it to the frontend.
* Currently, this is done only for SELECT targetlists and RETURNING lists,
* since we only need the info if we are going to send it to the frontend.
*/
void
markTargetListOrigins(ParseState *pstate, List *targetlist)