1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

amcheck: Add gin_index_check() to verify GIN index

Adds a new function, validating two kinds of invariants on a GIN index:

- parent-child consistency: Paths in a GIN graph have to contain
  consistent keys. Tuples on parent pages consistently include tuples
  from child pages; parent tuples do not require any adjustments.

- balanced-tree / graph: Each internal page has at least one downlink,
  and can reference either only leaf pages or only internal pages.

The GIN verification is based on work by Grigory Kryachko, reworked by
Heikki Linnakangas and with various improvements by Andrey Borodin.
Investigation and fixes for multiple bugs by Kirill Reshke.

Author: Grigory Kryachko <GSKryachko@gmail.com>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Author: Andrey Borodin <amborodin@acm.org>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Reviewed-By: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-By: Mark Dilger <mark.dilger@enterprisedb.com>
Reviewed-By: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
This commit is contained in:
Tomas Vondra
2025-03-29 15:43:55 +01:00
parent 53a2a1564a
commit 14ffaece0f
9 changed files with 946 additions and 3 deletions

View File

@ -0,0 +1,40 @@
-- Test of index bulk load
SELECT setseed(1);
CREATE TABLE "gin_check"("Column1" int[]);
-- posting trees (frequently used entries)
INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
-- posting leaves (sparse entries)
INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
SELECT gin_index_check('gin_check_idx');
-- cleanup
DROP TABLE gin_check;
-- Test index inserts
SELECT setseed(1);
CREATE TABLE "gin_check"("Column1" int[]);
CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
ALTER INDEX gin_check_idx SET (fastupdate = false);
-- posting trees
INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
-- posting leaves
INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
SELECT gin_index_check('gin_check_idx');
-- cleanup
DROP TABLE gin_check;
-- Test GIN over text array
SELECT setseed(1);
CREATE TABLE "gin_check_text_array"("Column1" text[]);
-- posting trees
INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000;
-- posting leaves
INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
SELECT gin_index_check('gin_check_text_array_idx');
-- cleanup
DROP TABLE gin_check_text_array;