mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Commit 9e43e8714
turns out to have been insufficient: not only is it
necessary to track tentative parent links while considering a set of
arc removals, but it's necessary to track tentative flag additions
as well. This is because we always merge arc target states into
arc source states; therefore, when considering a merge of the final
state with some other, it is the other state that will acquire a new
TSTATE_FIN bit. If there's another arc for the same color trigram
that would cause merging of that state with the initial state, we
failed to recognize the problem. The test cases for the prior commit
evidently only exercised situations where a tentative merge with the
initial state occurs before one with the final state. If it goes the
other way around, we'll happily merge the initial and final states,
either producing a broken final graph that would never match anything,
or triggering the Assert added by the prior commit.
It's tempting to consider switching the merge direction when the merge
involves the final state, but I lack the time to analyze that idea in
detail. Instead just keep track of the flag changes that would result
from proposed merges, in the same way that the prior commit tracked
proposed parent links.
Along the way, add some more debugging support, because I'm not entirely
confident that this is the last bug here. And tweak matters so that
the transformed.dot file uses small integers rather than pointer values
to identify states; that makes it more readable if you're just eyeballing
it rather than fooling with Graphviz. And rename a couple of identically
named struct fields to reduce confusion.
Per report from Corey Csuhta. Add a test case based on his example.
(Note: this case does not trigger the bug under 9.3, apparently because
its different measurement of costs causes it to stop merging states before
it hits the failure. I spent some time trying to find a variant that would
fail in 9.3, without success; but I'm sure such cases exist.)
Like the previous patch, back-patch to 9.3 where this code was added.
Report: https://postgr.es/m/E2B01A4B-4530-406B-8D17-2F67CF9A16BA@csuhta.com
125 lines
5.1 KiB
SQL
125 lines
5.1 KiB
SQL
CREATE EXTENSION pg_trgm;
|
|
|
|
select show_trgm('');
|
|
select show_trgm('(*&^$@%@');
|
|
select show_trgm('a b c');
|
|
select show_trgm(' a b c ');
|
|
select show_trgm('aA bB cC');
|
|
select show_trgm(' aA bB cC ');
|
|
select show_trgm('a b C0*%^');
|
|
|
|
select similarity('wow','WOWa ');
|
|
select similarity('wow',' WOW ');
|
|
|
|
select similarity('---', '####---');
|
|
|
|
CREATE TABLE test_trgm(t text);
|
|
|
|
\copy test_trgm from 'data/trgm.data'
|
|
|
|
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
|
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
|
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
|
select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
|
|
|
|
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
|
|
set enable_seqscan=off;
|
|
|
|
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
|
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
|
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
|
explain (costs off)
|
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
|
select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
|
|
|
|
drop index trgm_idx;
|
|
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
|
|
set enable_seqscan=off;
|
|
|
|
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
|
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
|
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
|
select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
|
|
|
|
create table test2(t text);
|
|
insert into test2 values ('abcdef');
|
|
insert into test2 values ('quark');
|
|
insert into test2 values (' z foo bar');
|
|
insert into test2 values ('/123/-45/');
|
|
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
|
set enable_seqscan=off;
|
|
explain (costs off)
|
|
select * from test2 where t like '%BCD%';
|
|
explain (costs off)
|
|
select * from test2 where t ilike '%BCD%';
|
|
select * from test2 where t like '%BCD%';
|
|
select * from test2 where t like '%bcd%';
|
|
select * from test2 where t like E'%\\bcd%';
|
|
select * from test2 where t ilike '%BCD%';
|
|
select * from test2 where t ilike 'qua%';
|
|
select * from test2 where t like '%z foo bar%';
|
|
select * from test2 where t like ' z foo%';
|
|
explain (costs off)
|
|
select * from test2 where t ~ '[abc]{3}';
|
|
explain (costs off)
|
|
select * from test2 where t ~* 'DEF';
|
|
select * from test2 where t ~ '[abc]{3}';
|
|
select * from test2 where t ~ 'a[bc]+d';
|
|
select * from test2 where t ~ '(abc)*$';
|
|
select * from test2 where t ~* 'DEF';
|
|
select * from test2 where t ~ 'dEf';
|
|
select * from test2 where t ~* '^q';
|
|
select * from test2 where t ~* '[abc]{3}[def]{3}';
|
|
select * from test2 where t ~* 'ab[a-z]{3}';
|
|
select * from test2 where t ~* '(^| )qua';
|
|
select * from test2 where t ~ 'q.*rk$';
|
|
select * from test2 where t ~ 'q';
|
|
select * from test2 where t ~ '[a-z]{3}';
|
|
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
|
|
select * from test2 where t ~ 'z foo bar';
|
|
select * from test2 where t ~ ' z foo bar';
|
|
select * from test2 where t ~ ' z foo bar';
|
|
select * from test2 where t ~ ' z foo';
|
|
select * from test2 where t ~ 'qua(?!foo)';
|
|
select * from test2 where t ~ '/\d+/-\d';
|
|
drop index test2_idx_gin;
|
|
|
|
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
|
set enable_seqscan=off;
|
|
explain (costs off)
|
|
select * from test2 where t like '%BCD%';
|
|
explain (costs off)
|
|
select * from test2 where t ilike '%BCD%';
|
|
select * from test2 where t like '%BCD%';
|
|
select * from test2 where t like '%bcd%';
|
|
select * from test2 where t like E'%\\bcd%';
|
|
select * from test2 where t ilike '%BCD%';
|
|
select * from test2 where t ilike 'qua%';
|
|
select * from test2 where t like '%z foo bar%';
|
|
select * from test2 where t like ' z foo%';
|
|
explain (costs off)
|
|
select * from test2 where t ~ '[abc]{3}';
|
|
explain (costs off)
|
|
select * from test2 where t ~* 'DEF';
|
|
select * from test2 where t ~ '[abc]{3}';
|
|
select * from test2 where t ~ 'a[bc]+d';
|
|
select * from test2 where t ~ '(abc)*$';
|
|
select * from test2 where t ~* 'DEF';
|
|
select * from test2 where t ~ 'dEf';
|
|
select * from test2 where t ~* '^q';
|
|
select * from test2 where t ~* '[abc]{3}[def]{3}';
|
|
select * from test2 where t ~* 'ab[a-z]{3}';
|
|
select * from test2 where t ~* '(^| )qua';
|
|
select * from test2 where t ~ 'q.*rk$';
|
|
select * from test2 where t ~ 'q';
|
|
select * from test2 where t ~ '[a-z]{3}';
|
|
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
|
|
select * from test2 where t ~ 'z foo bar';
|
|
select * from test2 where t ~ ' z foo bar';
|
|
select * from test2 where t ~ ' z foo bar';
|
|
select * from test2 where t ~ ' z foo';
|
|
select * from test2 where t ~ 'qua(?!foo)';
|
|
select * from test2 where t ~ '/\d+/-\d';
|