mirror of
https://github.com/postgres/postgres.git
synced 2025-12-22 17:42:17 +03:00
Convert json_in and jsonb_in to report errors softly.
This requires a bit of further infrastructure-extension to allow trapping errors reported by numeric_in and pg_unicode_to_server, but otherwise it's pretty straightforward. In the case of jsonb_in, we are only capturing errors reported during the initial "parse" phase. The value-construction phase (JsonbValueToJsonb) can also throw errors if assorted implementation limits are exceeded. We should improve that, but it seems like a separable project. Andrew Dunstan and Tom Lane Discussion: https://postgr.es/m/3bac9841-fe07-713d-fa42-606c225567d6@dunslane.net
This commit is contained in:
@@ -81,9 +81,10 @@ json_in(PG_FUNCTION_ARGS)
|
||||
|
||||
/* validate it */
|
||||
lex = makeJsonLexContext(result, false);
|
||||
pg_parse_json_or_ereport(lex, &nullSemAction);
|
||||
if (!pg_parse_json_or_errsave(lex, &nullSemAction, fcinfo->context))
|
||||
PG_RETURN_NULL();
|
||||
|
||||
/* Internal representation is the same as text, for now */
|
||||
/* Internal representation is the same as text */
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
|
||||
@@ -1337,7 +1338,7 @@ json_typeof(PG_FUNCTION_ARGS)
|
||||
/* Lex exactly one token from the input and check its type. */
|
||||
result = json_lex(lex);
|
||||
if (result != JSON_SUCCESS)
|
||||
json_ereport_error(result, lex);
|
||||
json_errsave_error(result, lex, NULL);
|
||||
tok = lex->token_type;
|
||||
switch (tok)
|
||||
{
|
||||
|
||||
@@ -33,6 +33,7 @@ typedef struct JsonbInState
|
||||
{
|
||||
JsonbParseState *parseState;
|
||||
JsonbValue *res;
|
||||
Node *escontext;
|
||||
} JsonbInState;
|
||||
|
||||
/* unlike with json categories, we need to treat json and jsonb differently */
|
||||
@@ -61,8 +62,8 @@ typedef struct JsonbAggState
|
||||
Oid val_output_func;
|
||||
} JsonbAggState;
|
||||
|
||||
static inline Datum jsonb_from_cstring(char *json, int len);
|
||||
static size_t checkStringLen(size_t len);
|
||||
static inline Datum jsonb_from_cstring(char *json, int len, Node *escontext);
|
||||
static bool checkStringLen(size_t len, Node *escontext);
|
||||
static JsonParseErrorType jsonb_in_object_start(void *pstate);
|
||||
static JsonParseErrorType jsonb_in_object_end(void *pstate);
|
||||
static JsonParseErrorType jsonb_in_array_start(void *pstate);
|
||||
@@ -98,7 +99,7 @@ jsonb_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *json = PG_GETARG_CSTRING(0);
|
||||
|
||||
return jsonb_from_cstring(json, strlen(json));
|
||||
return jsonb_from_cstring(json, strlen(json), fcinfo->context);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -122,7 +123,7 @@ jsonb_recv(PG_FUNCTION_ARGS)
|
||||
else
|
||||
elog(ERROR, "unsupported jsonb version number %d", version);
|
||||
|
||||
return jsonb_from_cstring(str, nbytes);
|
||||
return jsonb_from_cstring(str, nbytes, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -251,9 +252,12 @@ jsonb_typeof(PG_FUNCTION_ARGS)
|
||||
* Turns json string into a jsonb Datum.
|
||||
*
|
||||
* Uses the json parser (with hooks) to construct a jsonb.
|
||||
*
|
||||
* If escontext points to an ErrorSaveContext, errors are reported there
|
||||
* instead of being thrown.
|
||||
*/
|
||||
static inline Datum
|
||||
jsonb_from_cstring(char *json, int len)
|
||||
jsonb_from_cstring(char *json, int len, Node *escontext)
|
||||
{
|
||||
JsonLexContext *lex;
|
||||
JsonbInState state;
|
||||
@@ -263,6 +267,7 @@ jsonb_from_cstring(char *json, int len)
|
||||
memset(&sem, 0, sizeof(sem));
|
||||
lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true);
|
||||
|
||||
state.escontext = escontext;
|
||||
sem.semstate = (void *) &state;
|
||||
|
||||
sem.object_start = jsonb_in_object_start;
|
||||
@@ -272,23 +277,24 @@ jsonb_from_cstring(char *json, int len)
|
||||
sem.scalar = jsonb_in_scalar;
|
||||
sem.object_field_start = jsonb_in_object_field_start;
|
||||
|
||||
pg_parse_json_or_ereport(lex, &sem);
|
||||
if (!pg_parse_json_or_errsave(lex, &sem, escontext))
|
||||
return (Datum) 0;
|
||||
|
||||
/* after parsing, the item member has the composed jsonb structure */
|
||||
PG_RETURN_POINTER(JsonbValueToJsonb(state.res));
|
||||
}
|
||||
|
||||
static size_t
|
||||
checkStringLen(size_t len)
|
||||
static bool
|
||||
checkStringLen(size_t len, Node *escontext)
|
||||
{
|
||||
if (len > JENTRY_OFFLENMASK)
|
||||
ereport(ERROR,
|
||||
ereturn(escontext, false,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("string too long to represent as jsonb string"),
|
||||
errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.",
|
||||
JENTRY_OFFLENMASK)));
|
||||
|
||||
return len;
|
||||
return true;
|
||||
}
|
||||
|
||||
static JsonParseErrorType
|
||||
@@ -339,7 +345,9 @@ jsonb_in_object_field_start(void *pstate, char *fname, bool isnull)
|
||||
|
||||
Assert(fname != NULL);
|
||||
v.type = jbvString;
|
||||
v.val.string.len = checkStringLen(strlen(fname));
|
||||
v.val.string.len = strlen(fname);
|
||||
if (!checkStringLen(v.val.string.len, _state->escontext))
|
||||
return JSON_SEM_ACTION_FAILED;
|
||||
v.val.string.val = fname;
|
||||
|
||||
_state->res = pushJsonbValue(&_state->parseState, WJB_KEY, &v);
|
||||
@@ -390,7 +398,9 @@ jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype)
|
||||
case JSON_TOKEN_STRING:
|
||||
Assert(token != NULL);
|
||||
v.type = jbvString;
|
||||
v.val.string.len = checkStringLen(strlen(token));
|
||||
v.val.string.len = strlen(token);
|
||||
if (!checkStringLen(v.val.string.len, _state->escontext))
|
||||
return JSON_SEM_ACTION_FAILED;
|
||||
v.val.string.val = token;
|
||||
break;
|
||||
case JSON_TOKEN_NUMBER:
|
||||
@@ -401,10 +411,11 @@ jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype)
|
||||
*/
|
||||
Assert(token != NULL);
|
||||
v.type = jbvNumeric;
|
||||
numd = DirectFunctionCall3(numeric_in,
|
||||
CStringGetDatum(token),
|
||||
ObjectIdGetDatum(InvalidOid),
|
||||
Int32GetDatum(-1));
|
||||
if (!DirectInputFunctionCallSafe(numeric_in, token,
|
||||
InvalidOid, -1,
|
||||
_state->escontext,
|
||||
&numd))
|
||||
return JSON_SEM_ACTION_FAILED;
|
||||
v.val.numeric = DatumGetNumeric(numd);
|
||||
break;
|
||||
case JSON_TOKEN_TRUE:
|
||||
@@ -738,6 +749,9 @@ jsonb_categorize_type(Oid typoid,
|
||||
*
|
||||
* If key_scalar is true, the value is stored as a key, so insist
|
||||
* it's of an acceptable type, and force it to be a jbvString.
|
||||
*
|
||||
* Note: currently, we assume that result->escontext is NULL and errors
|
||||
* will be thrown.
|
||||
*/
|
||||
static void
|
||||
datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
|
||||
@@ -910,7 +924,8 @@ datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
|
||||
default:
|
||||
outputstr = OidOutputFunctionCall(outfuncoid, val);
|
||||
jb.type = jbvString;
|
||||
jb.val.string.len = checkStringLen(strlen(outputstr));
|
||||
jb.val.string.len = strlen(outputstr);
|
||||
(void) checkStringLen(jb.val.string.len, NULL);
|
||||
jb.val.string.val = outputstr;
|
||||
break;
|
||||
}
|
||||
@@ -1648,6 +1663,7 @@ jsonb_agg_finalfn(PG_FUNCTION_ARGS)
|
||||
* shallow clone is sufficient as we aren't going to change any of the
|
||||
* values, just add the final array end marker.
|
||||
*/
|
||||
memset(&result, 0, sizeof(JsonbInState));
|
||||
|
||||
result.parseState = clone_parse_state(arg->res->parseState);
|
||||
|
||||
@@ -1880,6 +1896,7 @@ jsonb_object_agg_finalfn(PG_FUNCTION_ARGS)
|
||||
* going to change any of the values, just add the final object end
|
||||
* marker.
|
||||
*/
|
||||
memset(&result, 0, sizeof(JsonbInState));
|
||||
|
||||
result.parseState = clone_parse_state(arg->res->parseState);
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "lib/stringinfo.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/miscnodes.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/fmgroids.h"
|
||||
@@ -490,21 +491,31 @@ static JsonParseErrorType transform_string_values_object_field_start(void *state
|
||||
static JsonParseErrorType transform_string_values_array_element_start(void *state, bool isnull);
|
||||
static JsonParseErrorType transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
|
||||
|
||||
|
||||
/*
|
||||
* pg_parse_json_or_ereport
|
||||
* pg_parse_json_or_errsave
|
||||
*
|
||||
* This function is like pg_parse_json, except that it does not return a
|
||||
* JsonParseErrorType. Instead, in case of any failure, this function will
|
||||
* save error data into *escontext if that's an ErrorSaveContext, otherwise
|
||||
* ereport(ERROR).
|
||||
*
|
||||
* Returns a boolean indicating success or failure (failure will only be
|
||||
* returned when escontext is an ErrorSaveContext).
|
||||
*/
|
||||
void
|
||||
pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem)
|
||||
bool
|
||||
pg_parse_json_or_errsave(JsonLexContext *lex, JsonSemAction *sem,
|
||||
Node *escontext)
|
||||
{
|
||||
JsonParseErrorType result;
|
||||
|
||||
result = pg_parse_json(lex, sem);
|
||||
if (result != JSON_SUCCESS)
|
||||
json_ereport_error(result, lex);
|
||||
{
|
||||
json_errsave_error(result, lex, escontext);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -608,17 +619,25 @@ jsonb_object_keys(PG_FUNCTION_ARGS)
|
||||
* Report a JSON error.
|
||||
*/
|
||||
void
|
||||
json_ereport_error(JsonParseErrorType error, JsonLexContext *lex)
|
||||
json_errsave_error(JsonParseErrorType error, JsonLexContext *lex,
|
||||
Node *escontext)
|
||||
{
|
||||
if (error == JSON_UNICODE_HIGH_ESCAPE ||
|
||||
error == JSON_UNICODE_UNTRANSLATABLE ||
|
||||
error == JSON_UNICODE_CODE_POINT_ZERO)
|
||||
ereport(ERROR,
|
||||
errsave(escontext,
|
||||
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
|
||||
errmsg("unsupported Unicode escape sequence"),
|
||||
errdetail_internal("%s", json_errdetail(error, lex)),
|
||||
report_json_context(lex)));
|
||||
else if (error == JSON_SEM_ACTION_FAILED)
|
||||
{
|
||||
/* semantic action function had better have reported something */
|
||||
if (!SOFT_ERROR_OCCURRED(escontext))
|
||||
elog(ERROR, "JSON semantic action function did not provide error information");
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
errsave(escontext,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("invalid input syntax for type %s", "json"),
|
||||
errdetail_internal("%s", json_errdetail(error, lex)),
|
||||
@@ -1274,7 +1293,7 @@ get_array_start(void *state)
|
||||
|
||||
error = json_count_array_elements(_state->lex, &nelements);
|
||||
if (error != JSON_SUCCESS)
|
||||
json_ereport_error(error, _state->lex);
|
||||
json_errsave_error(error, _state->lex, NULL);
|
||||
|
||||
if (-_state->path_indexes[lex_level] <= nelements)
|
||||
_state->path_indexes[lex_level] += nelements;
|
||||
|
||||
@@ -1614,6 +1614,51 @@ InputFunctionCallSafe(FmgrInfo *flinfo, char *str,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call a directly-named datatype input function, with non-exception
|
||||
* handling of "soft" errors.
|
||||
*
|
||||
* This is like InputFunctionCallSafe, except that it is given a direct
|
||||
* pointer to the C function to call. We assume that that function is
|
||||
* strict. Also, the function cannot be one that needs to
|
||||
* look at FmgrInfo, since there won't be any.
|
||||
*/
|
||||
bool
|
||||
DirectInputFunctionCallSafe(PGFunction func, char *str,
|
||||
Oid typioparam, int32 typmod,
|
||||
fmNodePtr escontext,
|
||||
Datum *result)
|
||||
{
|
||||
LOCAL_FCINFO(fcinfo, 3);
|
||||
|
||||
if (str == NULL)
|
||||
{
|
||||
*result = (Datum) 0; /* just return null result */
|
||||
return true;
|
||||
}
|
||||
|
||||
InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, escontext, NULL);
|
||||
|
||||
fcinfo->args[0].value = CStringGetDatum(str);
|
||||
fcinfo->args[0].isnull = false;
|
||||
fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
|
||||
fcinfo->args[1].isnull = false;
|
||||
fcinfo->args[2].value = Int32GetDatum(typmod);
|
||||
fcinfo->args[2].isnull = false;
|
||||
|
||||
*result = (*func) (fcinfo);
|
||||
|
||||
/* Result value is garbage, and could be null, if an error was reported */
|
||||
if (SOFT_ERROR_OCCURRED(escontext))
|
||||
return false;
|
||||
|
||||
/* Otherwise, shouldn't get null result */
|
||||
if (fcinfo->isnull)
|
||||
elog(ERROR, "input function %p returned NULL", (void *) func);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call a previously-looked-up datatype output function.
|
||||
*
|
||||
|
||||
@@ -916,6 +916,63 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
|
||||
BoolGetDatum(false));
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a single Unicode code point into a string in the server encoding.
|
||||
*
|
||||
* Same as pg_unicode_to_server(), except that we don't throw errors,
|
||||
* but simply return false on conversion failure.
|
||||
*/
|
||||
bool
|
||||
pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
|
||||
{
|
||||
unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
|
||||
int c_as_utf8_len;
|
||||
int converted_len;
|
||||
int server_encoding;
|
||||
|
||||
/* Fail if invalid Unicode code point */
|
||||
if (!is_valid_unicode_codepoint(c))
|
||||
return false;
|
||||
|
||||
/* Otherwise, if it's in ASCII range, conversion is trivial */
|
||||
if (c <= 0x7F)
|
||||
{
|
||||
s[0] = (unsigned char) c;
|
||||
s[1] = '\0';
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If the server encoding is UTF-8, we just need to reformat the code */
|
||||
server_encoding = GetDatabaseEncoding();
|
||||
if (server_encoding == PG_UTF8)
|
||||
{
|
||||
unicode_to_utf8(c, s);
|
||||
s[pg_utf_mblen(s)] = '\0';
|
||||
return true;
|
||||
}
|
||||
|
||||
/* For all other cases, we must have a conversion function available */
|
||||
if (Utf8ToServerConvProc == NULL)
|
||||
return false;
|
||||
|
||||
/* Construct UTF-8 source string */
|
||||
unicode_to_utf8(c, c_as_utf8);
|
||||
c_as_utf8_len = pg_utf_mblen(c_as_utf8);
|
||||
c_as_utf8[c_as_utf8_len] = '\0';
|
||||
|
||||
/* Convert, but without throwing error if we can't */
|
||||
converted_len = DatumGetInt32(FunctionCall6(Utf8ToServerConvProc,
|
||||
Int32GetDatum(PG_UTF8),
|
||||
Int32GetDatum(server_encoding),
|
||||
CStringGetDatum((char *) c_as_utf8),
|
||||
CStringGetDatum((char *) s),
|
||||
Int32GetDatum(c_as_utf8_len),
|
||||
BoolGetDatum(true)));
|
||||
|
||||
/* Conversion was successful iff it consumed the whole input */
|
||||
return (converted_len == c_as_utf8_len);
|
||||
}
|
||||
|
||||
|
||||
/* convert a multibyte string to a wchar */
|
||||
int
|
||||
|
||||
Reference in New Issue
Block a user