1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Convert tsqueryin and tsvectorin to report errors softly.

This is slightly tedious because the adjustments cascade through
a couple of levels of subroutines, but it's not very hard.
I chose to avoid changing function signatures more than absolutely
necessary, by passing the escontext pointer in existing structs
where possible.

tsquery's nuisance NOTICEs about empty queries are suppressed in
soft-error mode, since they're not errors and we surely don't want
them to be shown to the user anyway.  Maybe that whole behavior
should be reconsidered.

Discussion: https://postgr.es/m/3824377.1672076822@sss.pgh.pa.us
This commit is contained in:
Tom Lane
2022-12-27 12:00:31 -05:00
parent eb8312a22a
commit 78212f2101
8 changed files with 196 additions and 52 deletions

View File

@ -16,6 +16,7 @@
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "nodes/miscnodes.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
@ -58,10 +59,16 @@ typedef enum
/*
* get token from query string
*
* *operator is filled in with OP_* when return values is PT_OPR,
* but *weight could contain a distance value in case of phrase operator.
* *strval, *lenval and *weight are filled in when return value is PT_VAL
* All arguments except "state" are output arguments.
*
* If return value is PT_OPR, then *operator is filled with an OP_* code
* and *weight will contain a distance value in case of phrase operator.
*
* If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
* are filled.
*
* If PT_ERR is returned then a soft error has occurred. If state->escontext
* isn't already filled then this should be reported as a generic parse error.
*/
typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
int *lenval, char **strval,
@ -93,6 +100,9 @@ struct TSQueryParserStateData
/* state for value's parser */
TSVectorParseState valstate;
/* context object for soft errors - must match valstate's escontext */
Node *escontext;
};
/*
@ -194,7 +204,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
if (ptr == endptr)
return false;
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
ereport(ERROR,
ereturn(pstate->escontext, false,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
MAXENTRYPOS)));
@ -301,10 +311,8 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
}
else if (t_iseq(state->buf, ':'))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
/* generic syntax error message is fine */
return PT_ERR;
}
else if (!t_isspace(state->buf))
{
@ -320,12 +328,17 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
state->state = WAITOPERATOR;
return PT_VAL;
}
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* gettoken_tsvector reported a soft error */
return PT_ERR;
}
else if (state->state == WAITFIRSTOPERAND)
{
return PT_END;
}
else
ereport(ERROR,
ereturn(state->escontext, PT_ERR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("no operand in tsquery: \"%s\"",
state->buffer)));
@ -354,6 +367,11 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
*operator = OP_PHRASE;
return PT_OPR;
}
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* parse_phrase_operator reported a soft error */
return PT_ERR;
}
else if (t_iseq(state->buf, ')'))
{
state->buf++;
@ -438,6 +456,11 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
state->state = WAITOPERATOR;
return PT_VAL;
}
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* gettoken_tsvector reported a soft error */
return PT_ERR;
}
else if (state->state == WAITFIRSTOPERAND)
{
return PT_END;
@ -529,12 +552,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
QueryOperand *tmp;
if (distance >= MAXSTRPOS)
ereport(ERROR,
ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("value is too big in tsquery: \"%s\"",
state->buffer)));
if (lenval >= MAXSTRLEN)
ereport(ERROR,
ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("operand is too long in tsquery: \"%s\"",
state->buffer)));
@ -562,7 +585,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool
pg_crc32 valcrc;
if (lenval >= MAXSTRLEN)
ereport(ERROR,
ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long in tsquery: \"%s\"",
state->buffer)));
@ -686,11 +709,17 @@ makepol(TSQueryParserState state,
return;
case PT_ERR:
default:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
/* don't overwrite a soft error saved by gettoken function */
if (!SOFT_ERROR_OCCURRED(state->escontext))
errsave(state->escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
return;
}
/* detect soft error in pushval or recursion */
if (SOFT_ERROR_OCCURRED(state->escontext))
return;
}
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
@ -769,6 +798,8 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
/*
* Parse the tsquery stored in "buf".
*
* Each value (operand) in the query is passed to pushval. pushval can
* transform the simple value to an arbitrarily complex expression using
* pushValue and pushOperator. It must push a single value with pushValue,
@ -778,12 +809,19 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
*
* opaque is passed on to pushval as is, pushval can use it to store its
* private state.
*
* The pushval function can record soft errors via escontext.
* Callers must check SOFT_ERROR_OCCURRED to detect that.
*
* A bitmask of flags (see ts_utils.h) and an error context object
* can be provided as well. If a soft error occurs, NULL is returned.
*/
TSQuery
parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
int flags)
int flags,
Node *escontext)
{
struct TSQueryParserStateData state;
int i;
@ -791,6 +829,7 @@ parse_tsquery(char *buf,
int commonlen;
QueryItem *ptr;
ListCell *cell;
bool noisy;
bool needcleanup;
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
@ -808,15 +847,19 @@ parse_tsquery(char *buf,
else
state.gettoken = gettoken_query_standard;
/* emit nuisance NOTICEs only if not doing soft errors */
noisy = !(escontext && IsA(escontext, ErrorSaveContext));
/* init state */
state.buffer = buf;
state.buf = buf;
state.count = 0;
state.state = WAITFIRSTOPERAND;
state.polstr = NIL;
state.escontext = escontext;
/* init value parser's state */
state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
/* init list of operand */
state.sumlen = 0;
@ -829,11 +872,15 @@ parse_tsquery(char *buf,
close_tsvector_parser(state.valstate);
if (SOFT_ERROR_OCCURRED(escontext))
return NULL;
if (state.polstr == NIL)
{
ereport(NOTICE,
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
state.buffer)));
if (noisy)
ereport(NOTICE,
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
state.buffer)));
query = (TSQuery) palloc(HDRSIZETQ);
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
@ -841,7 +888,7 @@ parse_tsquery(char *buf,
}
if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
ereport(ERROR,
ereturn(escontext, NULL,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("tsquery is too large")));
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
@ -889,7 +936,7 @@ parse_tsquery(char *buf,
* If there are QI_VALSTOP nodes, delete them and simplify the tree.
*/
if (needcleanup)
query = cleanup_tsquery_stopwords(query);
query = cleanup_tsquery_stopwords(query, noisy);
return query;
}
@ -908,8 +955,13 @@ Datum
tsqueryin(PG_FUNCTION_ARGS)
{
char *in = PG_GETARG_CSTRING(0);
Node *escontext = fcinfo->context;
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
PG_RETURN_TSQUERY(parse_tsquery(in,
pushval_asis,
PointerGetDatum(NULL),
0,
escontext));
}
/*