mirror of
https://github.com/postgres/postgres.git
synced 2025-09-02 04:21:28 +03:00
Fix parsing of complex morphs to tsquery
When to_tsquery() or websearch_to_tsquery() meet a complex morph containing multiple words residing adjacent position, these words are connected with OP_AND operator. That leads to surprising results. For instace, both websearch_to_tsquery('"pg_class pg"') and to_tsquery('pg_class <-> pg') produce '( pg & class ) <-> pg' tsquery. This tsquery requires 'pg' and 'class' words to reside on the same position and doesn't match to to_tsvector('pg_class pg'). It appears to be ridiculous behavior, which needs to be fixed. This commit makes to_tsquery() or websearch_to_tsquery() connect words residing adjacent position with OP_PHRASE. Therefore, now those words are normally chained with other OP_PHRASE operator. The examples of above now produces 'pg <-> class <-> pg' tsquery, which matches to to_tsvector('pg_class pg'). Another effect of this commit is that complex morph word positions now need to match the tsvector even if there is no surrounding OP_PHRASE. This behavior change generally looks like an improvement but making this commit not backpatchable. Reported-by: Barry Pederson Bug: #16592 Discussion: https://postgr.es/m/16592-70b110ff9731c07d@postgresql.org Discussion: https://postgr.es/m/CAPpHfdv0EzVhf6CWfB1_TTZqXV_2Sn-jSY3zSd7ePH%3D-%2B1V2DQ%40mail.gmail.com Author: Alexander Korotkov Reviewed-by: Tom Lane, Neil Chen
This commit is contained in:
@@ -20,10 +20,20 @@
|
||||
#include "utils/jsonfuncs.h"
|
||||
|
||||
|
||||
/*
|
||||
* Opaque data structure, which is passed by parse_tsquery() to pushval_morph().
|
||||
*/
|
||||
typedef struct MorphOpaque
|
||||
{
|
||||
Oid cfg_id;
|
||||
int qoperator; /* query operator */
|
||||
|
||||
/*
|
||||
* Single tsquery morph could be parsed into multiple words. When these
|
||||
* words reside in adjacent positions, they are connected using this
|
||||
* operator. Usually, that is OP_PHRASE, which requires word positions of
|
||||
* a complex morph to exactly match the tsvector.
|
||||
*/
|
||||
int qoperator;
|
||||
} MorphOpaque;
|
||||
|
||||
typedef struct TSVectorBuildState
|
||||
@@ -573,7 +583,14 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
|
||||
MorphOpaque data;
|
||||
|
||||
data.cfg_id = PG_GETARG_OID(0);
|
||||
data.qoperator = OP_AND;
|
||||
|
||||
/*
|
||||
* Passing OP_PHRASE as a qoperator makes tsquery require matching of word
|
||||
* positions of a complex morph exactly match the tsvector. Also, when
|
||||
* the complex morphs are connected with OP_PHRASE operator, we connect
|
||||
* all their words into the OP_PHRASE sequence.
|
||||
*/
|
||||
data.qoperator = OP_PHRASE;
|
||||
|
||||
query = parse_tsquery(text_to_cstring(in),
|
||||
pushval_morph,
|
||||
@@ -603,6 +620,12 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
|
||||
MorphOpaque data;
|
||||
|
||||
data.cfg_id = PG_GETARG_OID(0);
|
||||
|
||||
/*
|
||||
* parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
|
||||
* single morph. Passing OP_PHRASE as a qoperator makes tsquery require
|
||||
* matching of all words independently on their positions.
|
||||
*/
|
||||
data.qoperator = OP_AND;
|
||||
|
||||
query = parse_tsquery(text_to_cstring(in),
|
||||
@@ -634,6 +657,12 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
|
||||
MorphOpaque data;
|
||||
|
||||
data.cfg_id = PG_GETARG_OID(0);
|
||||
|
||||
/*
|
||||
* parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
|
||||
* single morph. Passing OP_PHRASE as a qoperator makes tsquery require
|
||||
* matching of word positions.
|
||||
*/
|
||||
data.qoperator = OP_PHRASE;
|
||||
|
||||
query = parse_tsquery(text_to_cstring(in),
|
||||
@@ -665,7 +694,13 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
|
||||
|
||||
data.cfg_id = PG_GETARG_OID(0);
|
||||
|
||||
data.qoperator = OP_AND;
|
||||
/*
|
||||
* Passing OP_PHRASE as a qoperator makes tsquery require matching of word
|
||||
* positions of a complex morph exactly match the tsvector. Also, when
|
||||
* the complex morphs are given in quotes, we connect all their words into
|
||||
* the OP_PHRASE sequence.
|
||||
*/
|
||||
data.qoperator = OP_PHRASE;
|
||||
|
||||
query = parse_tsquery(text_to_cstring(in),
|
||||
pushval_morph,
|
||||
|
Reference in New Issue
Block a user