mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			172 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			172 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
This is an implementation of RD-tree data structure using GiST interface
 | 
						|
of PostgreSQL. It has built-in lossy compression.
 | 
						|
 | 
						|
Current implementation provides index support for one-dimensional array of
 | 
						|
int4's - gist__int_ops, suitable for small and medium size of arrays (used on
 | 
						|
default), and gist__intbig_ops for indexing large arrays (we use superimposed
 | 
						|
signature with length of 4096 bits to represent sets).
 | 
						|
 | 
						|
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
 | 
						|
(oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist
 | 
						|
for additional information. Andrey Oktyabrski did a great work on 
 | 
						|
adding new functions and operations.
 | 
						|
 | 
						|
 | 
						|
FUNCTIONS:
 | 
						|
 | 
						|
  int   icount(int[]) - the number of elements in intarray
 | 
						|
 | 
						|
test=# select icount('{1,2,3}'::int[]);   
 | 
						|
 icount 
 | 
						|
--------
 | 
						|
      3
 | 
						|
(1 row)
 | 
						|
 | 
						|
  int[] sort(int[], 'asc' | 'desc') - sort intarray
 | 
						|
 | 
						|
test=# select sort('{1,2,3}'::int[],'desc');
 | 
						|
  sort   
 | 
						|
---------
 | 
						|
 {3,2,1}
 | 
						|
(1 row)
 | 
						|
 | 
						|
  int[] sort(int[]) - sort in ascending order
 | 
						|
  int[] sort_asc(int[]),sort_desc(int[]) - shortcuts for sort 
 | 
						|
 | 
						|
  int[] uniq(int[]) - returns unique elements
 | 
						|
 | 
						|
test=# select uniq(sort('{1,2,3,2,1}'::int[]));
 | 
						|
  uniq   
 | 
						|
---------
 | 
						|
 {1,2,3}
 | 
						|
(1 row)
 | 
						|
 | 
						|
  int   idx(int[], int item) - returns index of first intarray matching element to item, or
 | 
						|
                              '0' if matching failed.
 | 
						|
 | 
						|
test=# select idx('{1,2,3,2,1}'::int[],2);
 | 
						|
 idx 
 | 
						|
-----
 | 
						|
   2
 | 
						|
(1 row)
 | 
						|
 | 
						|
 | 
						|
  int[] subarray(int[],int START [, int LEN]) - returns part of intarray starting from
 | 
						|
                                                element number START (from 1) and length LEN. 
 | 
						|
 | 
						|
test=# select subarray('{1,2,3,2,1}'::int[],2,3);
 | 
						|
 subarray 
 | 
						|
----------
 | 
						|
 {2,3,2}
 | 
						|
(1 row)
 | 
						|
 | 
						|
 | 
						|
OPERATIONS:
 | 
						|
 | 
						|
  int[] && int[]  - overlap - returns TRUE if arrays has at least one common elements.
 | 
						|
  int[] @  int[]  - contains - returns TRUE if left array contains right array
 | 
						|
  int[] ~ int[]   - contained - returns TRUE if left array is contained in right array
 | 
						|
  # int[]         - return the number of elements in array
 | 
						|
  int[] + int     - push element to array ( add to end of array)
 | 
						|
  int[] + int[]   - merge of arrays (right array added to the end of left one)
 | 
						|
  int[] - int     - remove entries matched by right argument from array
 | 
						|
  int[] - int[]   - remove right array from left
 | 
						|
  int[] | int     - returns intarray - union of arguments
 | 
						|
  int[] | int[]   - returns intarray as a union of two arrays
 | 
						|
  int[] & int[]   - returns intersection of arrays
 | 
						|
  int[] @@ query_int  - returns TRUE if array satisfies query (like '1&(2|3)') 
 | 
						|
  query_int ~~ int[]  - -/-
 | 
						|
 | 
						|
CHANGES:
 | 
						|
 | 
						|
August 6, 2002
 | 
						|
   1. Reworked patch from Andrey Oktyabrski (ano@spider.ru) with
 | 
						|
      functions: icount, sort, sort_asc, uniq, idx, subarray
 | 
						|
      operations: #, +, -, |, &
 | 
						|
October 1, 2001
 | 
						|
   1. Change search method in array to binary
 | 
						|
September 28, 2001
 | 
						|
   1. gist__int_ops now is without lossy
 | 
						|
   2. add sort entry in picksplit
 | 
						|
September 21, 2001
 | 
						|
   1. Added support for boolean query (indexable operator @@, looks like
 | 
						|
      a @@ '1|(2&3)', perfomance is better in any case )
 | 
						|
   2. Done some small optimizations
 | 
						|
March 19, 2001
 | 
						|
   1. Added support for toastable keys
 | 
						|
   2. Improved split algorithm for intbig (selection speedup is about 30%)
 | 
						|
 | 
						|
INSTALLATION:
 | 
						|
 | 
						|
  gmake
 | 
						|
  gmake install
 | 
						|
  -- load functions
 | 
						|
  psql <database> < _int.sql 
 | 
						|
 | 
						|
REGRESSION TEST:
 | 
						|
 | 
						|
   gmake installcheck
 | 
						|
 | 
						|
EXAMPLE USAGE:
 | 
						|
 | 
						|
  create table message (mid int not null,sections int[]);
 | 
						|
  create table message_section_map (mid int not null,sid int not null);
 | 
						|
 | 
						|
  -- create indices
 | 
						|
CREATE unique index message_key on message ( mid );
 | 
						|
CREATE unique index message_section_map_key2 on message_section_map (sid, mid );
 | 
						|
CREATE INDEX message_rdtree_idx on message using gist ( sections gist__int_ops);
 | 
						|
 | 
						|
  -- select some messages with section in 1 OR 2 - OVERLAP operator
 | 
						|
  select message.mid from message where message.sections && '{1,2}';  
 | 
						|
 | 
						|
  -- select messages contains in sections 1 AND 2 - CONTAINS operator
 | 
						|
  select message.mid from message where message.sections @ '{1,2}';
 | 
						|
  -- the same, CONTAINED operator
 | 
						|
  select message.mid from message where '{1,2}' ~ message.sections;
 | 
						|
 | 
						|
BENCHMARK:
 | 
						|
 | 
						|
  subdirectory bench contains benchmark suite.
 | 
						|
  cd ./bench
 | 
						|
  1. createdb TEST
 | 
						|
  2. psql TEST < ../_int.sql
 | 
						|
  3. ./create_test.pl | psql TEST
 | 
						|
  4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries
 | 
						|
                  with/without RD-Tree. Run script without arguments to 
 | 
						|
                  see availbale options.
 | 
						|
 | 
						|
     a)test without RD-Tree (OR)
 | 
						|
       ./bench.pl -d TEST -s 1,2 -v
 | 
						|
     b)test with RD-Tree 
 | 
						|
       ./bench.pl -d TEST -s 1,2 -v -r
 | 
						|
 | 
						|
BENCHMARKS:
 | 
						|
 | 
						|
Size of table <message>: 200000
 | 
						|
Size of table <message_section_map>: 268538 
 | 
						|
 | 
						|
Distribution of messages by sections:
 | 
						|
 | 
						|
section 0: 73899 messages
 | 
						|
section 1: 16298 messages
 | 
						|
section 50: 1241 messages
 | 
						|
section 99: 705 messages
 | 
						|
 | 
						|
old - without RD-Tree support,
 | 
						|
new - with RD-Tree
 | 
						|
 | 
						|
+----------+---------------+----------------+
 | 
						|
|Search set|OR, time in sec|AND, time in sec|
 | 
						|
|          +-------+-------+--------+-------+
 | 
						|
|          |  old  |  new  |   old  |  new  |
 | 
						|
+----------+-------+-------+--------+-------+
 | 
						|
|         1|  1.427|  0.215|       -|      -|
 | 
						|
+----------+-------+-------+--------+-------+
 | 
						|
|        99|  1.029|  0.018|       -|      -|
 | 
						|
+----------+-------+-------+--------+-------+
 | 
						|
|       1,2|  1.829|  0.334|   5.654|  0.042|
 | 
						|
+----------+-------+-------+--------+-------+
 | 
						|
| 1,2,50,60|  2.057|  0.359|   5.044|  0.007|
 | 
						|
+----------+-------+-------+--------+-------+
 |