mirror of
https://github.com/postgres/postgres.git
synced 2025-05-02 11:44:50 +03:00
This allows underscores to be used in integer and numeric literals, and their corresponding type input functions, for visual grouping. For example: 1_500_000_000 3.14159_26535_89793 0xffff_ffff 0b_1001_0001 A single underscore is allowed between any 2 digits, or immediately after the base prefix indicator of non-decimal integers, per SQL:202x draft. Peter Eisentraut and Dean Rasheed Discussion: https://postgr.es/m/84aae844-dc55-a4be-86d9-4f0fa405cc97%40enterprisedb.com
486 lines
15 KiB
C
486 lines
15 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* parse_node.c
|
|
* various routines that make nodes for querytrees
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/parser/parse_node.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/htup_details.h"
|
|
#include "access/table.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "mb/pg_wchar.h"
|
|
#include "nodes/makefuncs.h"
|
|
#include "nodes/miscnodes.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "nodes/subscripting.h"
|
|
#include "parser/parse_coerce.h"
|
|
#include "parser/parse_expr.h"
|
|
#include "parser/parse_relation.h"
|
|
#include "parser/parsetree.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/syscache.h"
|
|
#include "utils/varbit.h"
|
|
|
|
static void pcb_error_callback(void *arg);
|
|
|
|
|
|
/*
|
|
* make_parsestate
|
|
* Allocate and initialize a new ParseState.
|
|
*
|
|
* Caller should eventually release the ParseState via free_parsestate().
|
|
*/
|
|
ParseState *
|
|
make_parsestate(ParseState *parentParseState)
|
|
{
|
|
ParseState *pstate;
|
|
|
|
pstate = palloc0(sizeof(ParseState));
|
|
|
|
pstate->parentParseState = parentParseState;
|
|
|
|
/* Fill in fields that don't start at null/false/zero */
|
|
pstate->p_next_resno = 1;
|
|
pstate->p_resolve_unknowns = true;
|
|
|
|
if (parentParseState)
|
|
{
|
|
pstate->p_sourcetext = parentParseState->p_sourcetext;
|
|
/* all hooks are copied from parent */
|
|
pstate->p_pre_columnref_hook = parentParseState->p_pre_columnref_hook;
|
|
pstate->p_post_columnref_hook = parentParseState->p_post_columnref_hook;
|
|
pstate->p_paramref_hook = parentParseState->p_paramref_hook;
|
|
pstate->p_coerce_param_hook = parentParseState->p_coerce_param_hook;
|
|
pstate->p_ref_hook_state = parentParseState->p_ref_hook_state;
|
|
/* query environment stays in context for the whole parse analysis */
|
|
pstate->p_queryEnv = parentParseState->p_queryEnv;
|
|
}
|
|
|
|
return pstate;
|
|
}
|
|
|
|
/*
|
|
* free_parsestate
|
|
* Release a ParseState and any subsidiary resources.
|
|
*/
|
|
void
|
|
free_parsestate(ParseState *pstate)
|
|
{
|
|
/*
|
|
* Check that we did not produce too many resnos; at the very least we
|
|
* cannot allow more than 2^16, since that would exceed the range of a
|
|
* AttrNumber. It seems safest to use MaxTupleAttributeNumber.
|
|
*/
|
|
if (pstate->p_next_resno - 1 > MaxTupleAttributeNumber)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_TOO_MANY_COLUMNS),
|
|
errmsg("target lists can have at most %d entries",
|
|
MaxTupleAttributeNumber)));
|
|
|
|
if (pstate->p_target_relation != NULL)
|
|
table_close(pstate->p_target_relation, NoLock);
|
|
|
|
pfree(pstate);
|
|
}
|
|
|
|
|
|
/*
|
|
* parser_errposition
|
|
* Report a parse-analysis-time cursor position, if possible.
|
|
*
|
|
* This is expected to be used within an ereport() call. The return value
|
|
* is a dummy (always 0, in fact).
|
|
*
|
|
* The locations stored in raw parsetrees are byte offsets into the source
|
|
* string. We have to convert them to 1-based character indexes for reporting
|
|
* to clients. (We do things this way to avoid unnecessary overhead in the
|
|
* normal non-error case: computing character indexes would be much more
|
|
* expensive than storing token offsets.)
|
|
*/
|
|
int
|
|
parser_errposition(ParseState *pstate, int location)
|
|
{
|
|
int pos;
|
|
|
|
/* No-op if location was not provided */
|
|
if (location < 0)
|
|
return 0;
|
|
/* Can't do anything if source text is not available */
|
|
if (pstate == NULL || pstate->p_sourcetext == NULL)
|
|
return 0;
|
|
/* Convert offset to character number */
|
|
pos = pg_mbstrlen_with_len(pstate->p_sourcetext, location) + 1;
|
|
/* And pass it to the ereport mechanism */
|
|
return errposition(pos);
|
|
}
|
|
|
|
|
|
/*
|
|
* setup_parser_errposition_callback
|
|
* Arrange for non-parser errors to report an error position
|
|
*
|
|
* Sometimes the parser calls functions that aren't part of the parser
|
|
* subsystem and can't reasonably be passed a ParseState; yet we would
|
|
* like any errors thrown in those functions to be tagged with a parse
|
|
* error location. Use this function to set up an error context stack
|
|
* entry that will accomplish that. Usage pattern:
|
|
*
|
|
* declare a local variable "ParseCallbackState pcbstate"
|
|
* ...
|
|
* setup_parser_errposition_callback(&pcbstate, pstate, location);
|
|
* call function that might throw error;
|
|
* cancel_parser_errposition_callback(&pcbstate);
|
|
*/
|
|
void
|
|
setup_parser_errposition_callback(ParseCallbackState *pcbstate,
|
|
ParseState *pstate, int location)
|
|
{
|
|
/* Setup error traceback support for ereport() */
|
|
pcbstate->pstate = pstate;
|
|
pcbstate->location = location;
|
|
pcbstate->errcallback.callback = pcb_error_callback;
|
|
pcbstate->errcallback.arg = (void *) pcbstate;
|
|
pcbstate->errcallback.previous = error_context_stack;
|
|
error_context_stack = &pcbstate->errcallback;
|
|
}
|
|
|
|
/*
|
|
* Cancel a previously-set-up errposition callback.
|
|
*/
|
|
void
|
|
cancel_parser_errposition_callback(ParseCallbackState *pcbstate)
|
|
{
|
|
/* Pop the error context stack */
|
|
error_context_stack = pcbstate->errcallback.previous;
|
|
}
|
|
|
|
/*
|
|
* Error context callback for inserting parser error location.
|
|
*
|
|
* Note that this will be called for *any* error occurring while the
|
|
* callback is installed. We avoid inserting an irrelevant error location
|
|
* if the error is a query cancel --- are there any other important cases?
|
|
*/
|
|
static void
|
|
pcb_error_callback(void *arg)
|
|
{
|
|
ParseCallbackState *pcbstate = (ParseCallbackState *) arg;
|
|
|
|
if (geterrcode() != ERRCODE_QUERY_CANCELED)
|
|
(void) parser_errposition(pcbstate->pstate, pcbstate->location);
|
|
}
|
|
|
|
|
|
/*
|
|
* transformContainerType()
|
|
* Identify the actual container type for a subscripting operation.
|
|
*
|
|
* containerType/containerTypmod are modified if necessary to identify
|
|
* the actual container type and typmod. This mainly involves smashing
|
|
* any domain to its base type, but there are some special considerations.
|
|
* Note that caller still needs to check if the result type is a container.
|
|
*/
|
|
void
|
|
transformContainerType(Oid *containerType, int32 *containerTypmod)
|
|
{
|
|
/*
|
|
* If the input is a domain, smash to base type, and extract the actual
|
|
* typmod to be applied to the base type. Subscripting a domain is an
|
|
* operation that necessarily works on the base container type, not the
|
|
* domain itself. (Note that we provide no method whereby the creator of a
|
|
* domain over a container type could hide its ability to be subscripted.)
|
|
*/
|
|
*containerType = getBaseTypeAndTypmod(*containerType, containerTypmod);
|
|
|
|
/*
|
|
* We treat int2vector and oidvector as though they were domains over
|
|
* int2[] and oid[]. This is needed because array slicing could create an
|
|
* array that doesn't satisfy the dimensionality constraints of the
|
|
* xxxvector type; so we want the result of a slice operation to be
|
|
* considered to be of the more general type.
|
|
*/
|
|
if (*containerType == INT2VECTOROID)
|
|
*containerType = INT2ARRAYOID;
|
|
else if (*containerType == OIDVECTOROID)
|
|
*containerType = OIDARRAYOID;
|
|
}
|
|
|
|
/*
|
|
* transformContainerSubscripts()
|
|
* Transform container (array, etc) subscripting. This is used for both
|
|
* container fetch and container assignment.
|
|
*
|
|
* In a container fetch, we are given a source container value and we produce
|
|
* an expression that represents the result of extracting a single container
|
|
* element or a container slice.
|
|
*
|
|
* Container assignments are treated basically the same as container fetches
|
|
* here. The caller will modify the result node to insert the source value
|
|
* that is to be assigned to the element or slice that a fetch would have
|
|
* retrieved. The execution result will be a new container value with
|
|
* the source value inserted into the right part of the container.
|
|
*
|
|
* For both cases, if the source is of a domain-over-container type, the
|
|
* result is the same as if it had been of the container type; essentially,
|
|
* we must fold a domain to its base type before applying subscripting.
|
|
* (Note that int2vector and oidvector are treated as domains here.)
|
|
*
|
|
* pstate Parse state
|
|
* containerBase Already-transformed expression for the container as a whole
|
|
* containerType OID of container's datatype (should match type of
|
|
* containerBase, or be the base type of containerBase's
|
|
* domain type)
|
|
* containerTypMod typmod for the container
|
|
* indirection Untransformed list of subscripts (must not be NIL)
|
|
* isAssignment True if this will become a container assignment.
|
|
*/
|
|
SubscriptingRef *
|
|
transformContainerSubscripts(ParseState *pstate,
|
|
Node *containerBase,
|
|
Oid containerType,
|
|
int32 containerTypMod,
|
|
List *indirection,
|
|
bool isAssignment)
|
|
{
|
|
SubscriptingRef *sbsref;
|
|
const SubscriptRoutines *sbsroutines;
|
|
Oid elementType;
|
|
bool isSlice = false;
|
|
ListCell *idx;
|
|
|
|
/*
|
|
* Determine the actual container type, smashing any domain. In the
|
|
* assignment case the caller already did this, since it also needs to
|
|
* know the actual container type.
|
|
*/
|
|
if (!isAssignment)
|
|
transformContainerType(&containerType, &containerTypMod);
|
|
|
|
/*
|
|
* Verify that the container type is subscriptable, and get its support
|
|
* functions and typelem.
|
|
*/
|
|
sbsroutines = getSubscriptingRoutines(containerType, &elementType);
|
|
if (!sbsroutines)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
errmsg("cannot subscript type %s because it does not support subscripting",
|
|
format_type_be(containerType)),
|
|
parser_errposition(pstate, exprLocation(containerBase))));
|
|
|
|
/*
|
|
* Detect whether any of the indirection items are slice specifiers.
|
|
*
|
|
* A list containing only simple subscripts refers to a single container
|
|
* element. If any of the items are slice specifiers (lower:upper), then
|
|
* the subscript expression means a container slice operation.
|
|
*/
|
|
foreach(idx, indirection)
|
|
{
|
|
A_Indices *ai = lfirst_node(A_Indices, idx);
|
|
|
|
if (ai->is_slice)
|
|
{
|
|
isSlice = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Ready to build the SubscriptingRef node.
|
|
*/
|
|
sbsref = makeNode(SubscriptingRef);
|
|
|
|
sbsref->refcontainertype = containerType;
|
|
sbsref->refelemtype = elementType;
|
|
/* refrestype is to be set by container-specific logic */
|
|
sbsref->reftypmod = containerTypMod;
|
|
/* refcollid will be set by parse_collate.c */
|
|
/* refupperindexpr, reflowerindexpr are to be set by container logic */
|
|
sbsref->refexpr = (Expr *) containerBase;
|
|
sbsref->refassgnexpr = NULL; /* caller will fill if it's an assignment */
|
|
|
|
/*
|
|
* Call the container-type-specific logic to transform the subscripts and
|
|
* determine the subscripting result type.
|
|
*/
|
|
sbsroutines->transform(sbsref, indirection, pstate,
|
|
isSlice, isAssignment);
|
|
|
|
/*
|
|
* Verify we got a valid type (this defends, for example, against someone
|
|
* using array_subscript_handler as typsubscript without setting typelem).
|
|
*/
|
|
if (!OidIsValid(sbsref->refrestype))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
errmsg("cannot subscript type %s because it does not support subscripting",
|
|
format_type_be(containerType))));
|
|
|
|
return sbsref;
|
|
}
|
|
|
|
/*
|
|
* make_const
|
|
*
|
|
* Convert an A_Const node (as returned by the grammar) to a Const node
|
|
* of the "natural" type for the constant. Note that this routine is
|
|
* only used when there is no explicit cast for the constant, so we
|
|
* have to guess what type is wanted.
|
|
*
|
|
* For string literals we produce a constant of type UNKNOWN ---- whose
|
|
* representation is the same as cstring, but it indicates to later type
|
|
* resolution that we're not sure yet what type it should be considered.
|
|
* Explicit "NULL" constants are also typed as UNKNOWN.
|
|
*
|
|
* For integers and floats we produce int4, int8, or numeric depending
|
|
* on the value of the number. XXX We should produce int2 as well,
|
|
* but additional cleanup is needed before we can do that; there are
|
|
* too many examples that fail if we try.
|
|
*/
|
|
Const *
|
|
make_const(ParseState *pstate, A_Const *aconst)
|
|
{
|
|
Const *con;
|
|
Datum val;
|
|
Oid typeid;
|
|
int typelen;
|
|
bool typebyval;
|
|
ParseCallbackState pcbstate;
|
|
|
|
if (aconst->isnull)
|
|
{
|
|
/* return a null const */
|
|
con = makeConst(UNKNOWNOID,
|
|
-1,
|
|
InvalidOid,
|
|
-2,
|
|
(Datum) 0,
|
|
true,
|
|
false);
|
|
con->location = aconst->location;
|
|
return con;
|
|
}
|
|
|
|
switch (nodeTag(&aconst->val))
|
|
{
|
|
case T_Integer:
|
|
val = Int32GetDatum(intVal(&aconst->val));
|
|
|
|
typeid = INT4OID;
|
|
typelen = sizeof(int32);
|
|
typebyval = true;
|
|
break;
|
|
|
|
case T_Float:
|
|
{
|
|
/* could be an oversize integer as well as a float ... */
|
|
|
|
ErrorSaveContext escontext = {T_ErrorSaveContext};
|
|
int64 val64;
|
|
|
|
val64 = pg_strtoint64_safe(aconst->val.fval.fval, (Node *) &escontext);
|
|
if (!escontext.error_occurred)
|
|
{
|
|
/*
|
|
* It might actually fit in int32. Probably only INT_MIN
|
|
* can occur, but we'll code the test generally just to be
|
|
* sure.
|
|
*/
|
|
int32 val32 = (int32) val64;
|
|
|
|
if (val64 == (int64) val32)
|
|
{
|
|
val = Int32GetDatum(val32);
|
|
|
|
typeid = INT4OID;
|
|
typelen = sizeof(int32);
|
|
typebyval = true;
|
|
}
|
|
else
|
|
{
|
|
val = Int64GetDatum(val64);
|
|
|
|
typeid = INT8OID;
|
|
typelen = sizeof(int64);
|
|
typebyval = FLOAT8PASSBYVAL; /* int8 and float8 alike */
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* arrange to report location if numeric_in() fails */
|
|
setup_parser_errposition_callback(&pcbstate, pstate, aconst->location);
|
|
val = DirectFunctionCall3(numeric_in,
|
|
CStringGetDatum(aconst->val.fval.fval),
|
|
ObjectIdGetDatum(InvalidOid),
|
|
Int32GetDatum(-1));
|
|
cancel_parser_errposition_callback(&pcbstate);
|
|
|
|
typeid = NUMERICOID;
|
|
typelen = -1; /* variable len */
|
|
typebyval = false;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case T_Boolean:
|
|
val = BoolGetDatum(boolVal(&aconst->val));
|
|
|
|
typeid = BOOLOID;
|
|
typelen = 1;
|
|
typebyval = true;
|
|
break;
|
|
|
|
case T_String:
|
|
|
|
/*
|
|
* We assume here that UNKNOWN's internal representation is the
|
|
* same as CSTRING
|
|
*/
|
|
val = CStringGetDatum(strVal(&aconst->val));
|
|
|
|
typeid = UNKNOWNOID; /* will be coerced later */
|
|
typelen = -2; /* cstring-style varwidth type */
|
|
typebyval = false;
|
|
break;
|
|
|
|
case T_BitString:
|
|
/* arrange to report location if bit_in() fails */
|
|
setup_parser_errposition_callback(&pcbstate, pstate, aconst->location);
|
|
val = DirectFunctionCall3(bit_in,
|
|
CStringGetDatum(aconst->val.bsval.bsval),
|
|
ObjectIdGetDatum(InvalidOid),
|
|
Int32GetDatum(-1));
|
|
cancel_parser_errposition_callback(&pcbstate);
|
|
typeid = BITOID;
|
|
typelen = -1;
|
|
typebyval = false;
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(&aconst->val));
|
|
return NULL; /* keep compiler quiet */
|
|
}
|
|
|
|
con = makeConst(typeid,
|
|
-1, /* typmod -1 is OK for all cases */
|
|
InvalidOid, /* all cases are uncollatable types */
|
|
typelen,
|
|
val,
|
|
false,
|
|
typebyval);
|
|
con->location = aconst->location;
|
|
|
|
return con;
|
|
}
|