1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-20 05:03:10 +03:00

Add json(b)_to_tsvector function

Jsonb has a complex nature so there isn't best-for-everything way to convert it
to tsvector for full text search. Current to_tsvector(json(b)) suggests to
convert only string values, but it's possible to index keys, numerics and even
booleans value. To solve that json(b)_to_tsvector has a second required
argument contained a list of desired types of json fields. Second argument is
a jsonb scalar or array right now with possibility to add new options in a
future.

Bump catalog version

Author: Dmitry Dolgov with some editorization by me
Reviewed by: Teodor Sigaev
Discussion: https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
This commit is contained in:
Teodor Sigaev
2018-04-07 20:58:03 +03:00
parent 529ab7bd1f
commit 1c1791e000
10 changed files with 630 additions and 50 deletions

View File

@ -2324,6 +2324,86 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
-- json to tsvector with numeric values
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
to_tsvector
---------------------------------
'aaa':1 'bbb':3 'ccc':5 'ddd':4
(1 row)
-- json_to_tsvector
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
json_to_tsvector
----------------------------------------------------------------------------------------
'123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
(1 row)
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
json_to_tsvector
--------------------------------
'b':2 'c':4 'd':6 'f':8 'g':10
(1 row)
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
json_to_tsvector
------------------
'aaa':1 'bbb':3
(1 row)
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
json_to_tsvector
------------------
'123':1 '456':3
(1 row)
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
json_to_tsvector
-------------------
'fals':3 'true':1
(1 row)
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
json_to_tsvector
---------------------------------
'123':5 '456':7 'aaa':1 'bbb':3
(1 row)
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
json_to_tsvector
----------------------------------------------------------------------------------------
'123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
(1 row)
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
json_to_tsvector
--------------------------------
'b':2 'c':4 'd':6 'f':8 'g':10
(1 row)
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
json_to_tsvector
------------------
'aaa':1 'bbb':3
(1 row)
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
json_to_tsvector
------------------
'123':1 '456':3
(1 row)
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
json_to_tsvector
-------------------
'fals':3 'true':1
(1 row)
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
json_to_tsvector
---------------------------------
'123':5 '456':7 'aaa':1 'bbb':3
(1 row)
-- ts_vector corner cases
select to_tsvector('""'::json);
to_tsvector
@ -2349,6 +2429,48 @@ select to_tsvector('null'::json);
(1 row)
-- json_to_tsvector corner cases
select json_to_tsvector('""'::json, '"all"');
json_to_tsvector
------------------
(1 row)
select json_to_tsvector('{}'::json, '"all"');
json_to_tsvector
------------------
(1 row)
select json_to_tsvector('[]'::json, '"all"');
json_to_tsvector
------------------
(1 row)
select json_to_tsvector('null'::json, '"all"');
json_to_tsvector
------------------
(1 row)
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '""');
ERROR: wrong flag in flag array: ""
HINT: Possible values are: "string", "numeric", "boolean", "key" and "all"
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '{}');
ERROR: wrong flag type, only arrays and scalars are allowed
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '[]');
json_to_tsvector
------------------
(1 row)
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, 'null');
ERROR: flag array element is not a string
HINT: Possible values are: "string", "numeric", "boolean", "key" and "all"
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["all", null]');
ERROR: flag array element is not a string
HINT: Possible values are: "string", "numeric", "boolean", "key" and "all"
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
ts_headline

View File

@ -4122,6 +4122,86 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
-- jsonb to tsvector with numeric values
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
to_tsvector
---------------------------------
'aaa':1 'bbb':3 'ccc':5 'ddd':4
(1 row)
-- jsonb_to_tsvector
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
jsonb_to_tsvector
----------------------------------------------------------------------------------------
'123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
(1 row)
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
jsonb_to_tsvector
--------------------------------
'b':2 'c':4 'd':6 'f':8 'g':10
(1 row)
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
jsonb_to_tsvector
-------------------
'aaa':1 'bbb':3
(1 row)
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
jsonb_to_tsvector
-------------------
'123':1 '456':3
(1 row)
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
jsonb_to_tsvector
-------------------
'fals':3 'true':1
(1 row)
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
jsonb_to_tsvector
---------------------------------
'123':5 '456':7 'aaa':1 'bbb':3
(1 row)
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
jsonb_to_tsvector
----------------------------------------------------------------------------------------
'123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
(1 row)
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
jsonb_to_tsvector
--------------------------------
'b':2 'c':4 'd':6 'f':8 'g':10
(1 row)
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
jsonb_to_tsvector
-------------------
'aaa':1 'bbb':3
(1 row)
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
jsonb_to_tsvector
-------------------
'123':1 '456':3
(1 row)
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
jsonb_to_tsvector
-------------------
'fals':3 'true':1
(1 row)
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
jsonb_to_tsvector
---------------------------------
'123':5 '456':7 'aaa':1 'bbb':3
(1 row)
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
to_tsvector
@ -4147,6 +4227,48 @@ select to_tsvector('null'::jsonb);
(1 row)
-- jsonb_to_tsvector corner cases
select jsonb_to_tsvector('""'::jsonb, '"all"');
jsonb_to_tsvector
-------------------
(1 row)
select jsonb_to_tsvector('{}'::jsonb, '"all"');
jsonb_to_tsvector
-------------------
(1 row)
select jsonb_to_tsvector('[]'::jsonb, '"all"');
jsonb_to_tsvector
-------------------
(1 row)
select jsonb_to_tsvector('null'::jsonb, '"all"');
jsonb_to_tsvector
-------------------
(1 row)
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '""');
ERROR: wrong flag in flag array: ""
HINT: Possible values are: "string", "numeric", "boolean", "key" and "all"
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '{}');
ERROR: wrong flag type, only arrays and scalars are allowed
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '[]');
jsonb_to_tsvector
-------------------
(1 row)
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, 'null');
ERROR: flag array element is not a string
HINT: Possible values are: "string", "numeric", "boolean", "key" and "all"
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["all", null]');
ERROR: flag array element is not a string
HINT: Possible values are: "string", "numeric", "boolean", "key" and "all"
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
ts_headline

View File

@ -763,12 +763,42 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- json to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
-- json to tsvector with numeric values
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
-- json_to_tsvector
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
-- ts_vector corner cases
select to_tsvector('""'::json);
select to_tsvector('{}'::json);
select to_tsvector('[]'::json);
select to_tsvector('null'::json);
-- json_to_tsvector corner cases
select json_to_tsvector('""'::json, '"all"');
select json_to_tsvector('{}'::json, '"all"');
select json_to_tsvector('[]'::json, '"all"');
select json_to_tsvector('null'::json, '"all"');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '""');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '{}');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '[]');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, 'null');
select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["all", null]');
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));

View File

@ -1089,12 +1089,42 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- jsonb to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
-- jsonb to tsvector with numeric values
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
-- jsonb_to_tsvector
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
select to_tsvector('{}'::jsonb);
select to_tsvector('[]'::jsonb);
select to_tsvector('null'::jsonb);
-- jsonb_to_tsvector corner cases
select jsonb_to_tsvector('""'::jsonb, '"all"');
select jsonb_to_tsvector('{}'::jsonb, '"all"');
select jsonb_to_tsvector('[]'::jsonb, '"all"');
select jsonb_to_tsvector('null'::jsonb, '"all"');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '""');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '{}');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '[]');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, 'null');
select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["all", null]');
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));