mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-29 22:49:41 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			185 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			185 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| This is an implementation of RD-tree data structure using GiST interface
 | |
| of PostgreSQL. It has built-in lossy compression.
 | |
| 
 | |
| Current implementation provides index support for one-dimensional array of
 | |
| int4's - gist__int_ops, suitable for small and medium size of arrays (used on
 | |
| default), and gist__intbig_ops for indexing large arrays (we use superimposed
 | |
| signature with length of 4096 bits to represent sets).
 | |
| 
 | |
| All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
 | |
| (oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist
 | |
| for additional information. Andrey Oktyabrski did a great work on 
 | |
| adding new functions and operations.
 | |
| 
 | |
| 
 | |
| FUNCTIONS:
 | |
| 
 | |
|   int   icount(int[]) - the number of elements in intarray
 | |
| 
 | |
| test=# select icount('{1,2,3}'::int[]);   
 | |
|  icount 
 | |
| --------
 | |
|       3
 | |
| (1 row)
 | |
| 
 | |
|   int[] sort(int[], 'asc' | 'desc') - sort intarray
 | |
| 
 | |
| test=# select sort('{1,2,3}'::int[],'desc');
 | |
|   sort   
 | |
| ---------
 | |
|  {3,2,1}
 | |
| (1 row)
 | |
| 
 | |
|   int[] sort(int[]) - sort in ascending order
 | |
|   int[] sort_asc(int[]),sort_desc(int[]) - shortcuts for sort 
 | |
| 
 | |
|   int[] uniq(int[]) - returns unique elements
 | |
| 
 | |
| test=# select uniq(sort('{1,2,3,2,1}'::int[]));
 | |
|   uniq   
 | |
| ---------
 | |
|  {1,2,3}
 | |
| (1 row)
 | |
| 
 | |
|   int   idx(int[], int item) - returns index of first intarray matching element to item, or
 | |
|                               '0' if matching failed.
 | |
| 
 | |
| test=# select idx('{1,2,3,2,1}'::int[],2);
 | |
|  idx 
 | |
| -----
 | |
|    2
 | |
| (1 row)
 | |
| 
 | |
| 
 | |
|   int[] subarray(int[],int START [, int LEN]) - returns part of intarray starting from
 | |
|                                                 element number START (from 1) and length LEN. 
 | |
| 
 | |
| test=# select subarray('{1,2,3,2,1}'::int[],2,3);
 | |
|  subarray 
 | |
| ----------
 | |
|  {2,3,2}
 | |
| (1 row)
 | |
| 
 | |
|   int[] intset(int4) - casting int4 to int[]
 | |
| 
 | |
| test=# select intset(1);
 | |
|  intset 
 | |
| --------
 | |
|  {1}
 | |
| (1 row)
 | |
| 
 | |
| OPERATIONS:
 | |
| 
 | |
|   int[] && int[]  - overlap - returns TRUE if arrays have at least one common element
 | |
|   int[] @> int[]  - contains - returns TRUE if left array contains right array
 | |
|   int[] <@ int[]  - contained - returns TRUE if left array is contained in right array
 | |
|   # int[]         - returns the number of elements in array
 | |
|   int[] + int     - push element to array ( add to end of array)
 | |
|   int[] + int[]   - merge of arrays (right array added to the end of left one)
 | |
|   int[] - int     - remove entries matched by right argument from array
 | |
|   int[] - int[]   - remove right array from left
 | |
|   int[] | int     - returns intarray - union of arguments
 | |
|   int[] | int[]   - returns intarray as a union of two arrays
 | |
|   int[] & int[]   - returns intersection of arrays
 | |
|   int[] @@ query_int - returns TRUE if array satisfies query (like '1&(2|3)') 
 | |
|   query_int ~~ int[] - returns TRUE if array satisfies query (commutator of @@)
 | |
| 
 | |
| (Before PostgreSQL 8.2, the containment operators @> and <@ were
 | |
| respectively called @ and ~.  These names are still available, but are
 | |
| deprecated and will eventually be retired.  Notice that the old names
 | |
| are reversed from the convention formerly followed by the core geometric
 | |
| datatypes!)
 | |
| 
 | |
| CHANGES:
 | |
| 
 | |
| August 6, 2002
 | |
|    1. Reworked patch from Andrey Oktyabrski (ano@spider.ru) with
 | |
|       functions: icount, sort, sort_asc, uniq, idx, subarray
 | |
|       operations: #, +, -, |, &
 | |
| October 1, 2001
 | |
|    1. Change search method in array to binary
 | |
| September 28, 2001
 | |
|    1. gist__int_ops now is without lossy
 | |
|    2. add sort entry in picksplit
 | |
| September 21, 2001
 | |
|    1. Added support for boolean query (indexable operator @@, looks like
 | |
|       a @@ '1|(2&3)', perfomance is better in any case )
 | |
|    2. Done some small optimizations
 | |
| March 19, 2001
 | |
|    1. Added support for toastable keys
 | |
|    2. Improved split algorithm for intbig (selection speedup is about 30%)
 | |
| 
 | |
| INSTALLATION:
 | |
| 
 | |
|   gmake
 | |
|   gmake install
 | |
|   -- load functions
 | |
|   psql <database> < _int.sql 
 | |
| 
 | |
| REGRESSION TEST:
 | |
| 
 | |
|    gmake installcheck
 | |
| 
 | |
| EXAMPLE USAGE:
 | |
| 
 | |
|   create table message (mid int not null,sections int[]);
 | |
|   create table message_section_map (mid int not null,sid int not null);
 | |
| 
 | |
|   -- create indices
 | |
| CREATE unique index message_key on message ( mid );
 | |
| CREATE unique index message_section_map_key2 on message_section_map (sid, mid );
 | |
| CREATE INDEX message_rdtree_idx on message using gist ( sections gist__int_ops);
 | |
| 
 | |
|   -- select some messages with section in 1 OR 2 - OVERLAP operator
 | |
|   select message.mid from message where message.sections && '{1,2}';  
 | |
| 
 | |
|   -- select messages contains in sections 1 AND 2 - CONTAINS operator
 | |
|   select message.mid from message where message.sections @> '{1,2}';
 | |
|   -- the same, CONTAINED operator
 | |
|   select message.mid from message where '{1,2}' <@ message.sections;
 | |
| 
 | |
| BENCHMARK:
 | |
| 
 | |
|   subdirectory bench contains benchmark suite.
 | |
|   cd ./bench
 | |
|   1. createdb TEST
 | |
|   2. psql TEST < ../_int.sql
 | |
|   3. ./create_test.pl | psql TEST
 | |
|   4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries
 | |
|                   with/without RD-Tree. Run script without arguments to 
 | |
|                   see availbale options.
 | |
| 
 | |
|      a)test without RD-Tree (OR)
 | |
|        ./bench.pl -d TEST -c -s 1,2 -v
 | |
|      b)test with RD-Tree 
 | |
|        ./bench.pl -d TEST -c -s 1,2 -v -r
 | |
| 
 | |
| BENCHMARKS:
 | |
| 
 | |
| Size of table <message>: 200000
 | |
| Size of table <message_section_map>: 269133 
 | |
| 
 | |
| Distribution of messages by sections:
 | |
| 
 | |
| section 0: 74377 messages
 | |
| section 1: 16284 messages
 | |
| section 50: 1229 messages
 | |
| section 99: 683 messages
 | |
| 
 | |
| old - without RD-Tree support,
 | |
| new - with RD-Tree
 | |
| 
 | |
| +----------+---------------+----------------+
 | |
| |Search set|OR, time in sec|AND, time in sec|
 | |
| |          +-------+-------+--------+-------+
 | |
| |          |  old  |  new  |   old  |  new  |
 | |
| +----------+-------+-------+--------+-------+
 | |
| |         1|  0.625|  0.101|       -|      -|
 | |
| +----------+-------+-------+--------+-------+
 | |
| |        99|  0.018|  0.017|       -|      -|
 | |
| +----------+-------+-------+--------+-------+
 | |
| |       1,2|  0.766|  0.133|   0.628|  0.045|
 | |
| +----------+-------+-------+--------+-------+
 | |
| | 1,2,50,65|  0.794|  0.141|   0.030|  0.006|
 | |
| +----------+-------+-------+--------+-------+
 |