diff --git a/src/backend/access/gin/ginpostinglist.c b/src/backend/access/gin/ginpostinglist.c index 48ccfafdd22..44aef77edb7 100644 --- a/src/backend/access/gin/ginpostinglist.c +++ b/src/backend/access/gin/ginpostinglist.c @@ -23,25 +23,32 @@ /* * For encoding purposes, item pointers are represented as 64-bit unsigned * integers. The lowest 11 bits represent the offset number, and the next - * lowest 32 bits are the block number. That leaves 17 bits unused, i.e. + * lowest 32 bits are the block number. That leaves 21 bits unused, i.e. * only 43 low bits are used. * + * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage < + * 2^11 on all supported block sizes. We are frugal with the bits, because + * smaller integers use fewer bytes in the varbyte encoding, saving disk + * space. (If we get a new table AM in the future that wants to use the full + * range of possible offset numbers, we'll need to change this.) + * * These 43-bit integers are encoded using varbyte encoding. In each byte, * the 7 low bits contain data, while the highest bit is a continuation bit. * When the continuation bit is set, the next byte is part of the same - * integer, otherwise this is the last byte of this integer. 43 bits fit - * conveniently in at most 6 bytes when varbyte encoded (the 6th byte does - * not need a continuation bit, because we know the max size to be 43 bits): + * integer, otherwise this is the last byte of this integer. 43 bits need + * at most 7 bytes in this encoding: * * 0XXXXXXX * 1XXXXXXX 0XXXXYYY * 1XXXXXXX 1XXXXYYY 0YYYYYYY * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY - * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY YYYYYYYY + * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY + * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY * * X = bits used for offset number * Y = bits used for block number + * u = unused bit * * The bytes are in stored in little-endian order. * @@ -73,6 +80,9 @@ */ #define MaxHeapTuplesPerPageBits 11 +/* Max. number of bytes needed to encode the largest supported integer. */ +#define MaxBytesPerInteger 7 + static inline uint64 itemptr_to_uint64(const ItemPointer iptr) { @@ -126,33 +136,40 @@ decode_varbyte(unsigned char **ptr) unsigned char *p = *ptr; uint64 c; + /* 1st byte */ c = *(p++); val = c & 0x7F; if (c & 0x80) { + /* 2nd byte */ c = *(p++); val |= (c & 0x7F) << 7; if (c & 0x80) { + /* 3rd byte */ c = *(p++); val |= (c & 0x7F) << 14; if (c & 0x80) { + /* 4th byte */ c = *(p++); val |= (c & 0x7F) << 21; if (c & 0x80) { + /* 5th byte */ c = *(p++); val |= (c & 0x7F) << 28; if (c & 0x80) { + /* 6th byte */ c = *(p++); val |= (c & 0x7F) << 35; if (c & 0x80) { - /* last byte, no continuation bit */ + /* 7th byte, should not have continuation bit */ c = *(p++); val |= c << 42; + Assert((c & 0x80) == 0); } } } @@ -208,15 +225,15 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize, Assert(val > prev); - if (endptr - ptr >= 6) + if (endptr - ptr >= MaxBytesPerInteger) encode_varbyte(delta, &ptr); else { /* - * There are less than 6 bytes left. Have to check if the next + * There are less than 7 bytes left. Have to check if the next * item fits in that space before writing it out. */ - unsigned char buf[6]; + unsigned char buf[MaxBytesPerInteger]; unsigned char *p = buf; encode_varbyte(delta, &p); diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 60d6d7be1b2..953905d1eff 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -12,6 +12,7 @@ SUBDIRS = \ test_bloomfilter \ test_ddl_deparse \ test_extensions \ + test_ginpostinglist \ test_integerset \ test_parser \ test_pg_dump \ diff --git a/src/test/modules/test_ginpostinglist/Makefile b/src/test/modules/test_ginpostinglist/Makefile new file mode 100644 index 00000000000..4d45ac999af --- /dev/null +++ b/src/test/modules/test_ginpostinglist/Makefile @@ -0,0 +1,21 @@ +# src/test/modules/test_ginpostinglist/Makefile + +MODULE_big = test_ginpostinglist +OBJS = test_ginpostinglist.o $(WIN32RES) +PGFILEDESC = "test_ginpostinglist - test code for src/backend/access/gin//ginpostinglist.c" + +EXTENSION = test_ginpostinglist +DATA = test_ginpostinglist--1.0.sql + +REGRESS = test_ginpostinglist + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_ginpostinglist +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_ginpostinglist/README b/src/test/modules/test_ginpostinglist/README new file mode 100644 index 00000000000..66684dd0da4 --- /dev/null +++ b/src/test/modules/test_ginpostinglist/README @@ -0,0 +1,2 @@ +test_ginpostinglist contains unit tests for the GIN posting list code in +src/backend/access/gin/ginpostinglist.c. diff --git a/src/test/modules/test_ginpostinglist/expected/test_ginpostinglist.out b/src/test/modules/test_ginpostinglist/expected/test_ginpostinglist.out new file mode 100644 index 00000000000..4d0beaecea9 --- /dev/null +++ b/src/test/modules/test_ginpostinglist/expected/test_ginpostinglist.out @@ -0,0 +1,19 @@ +CREATE EXTENSION test_ginpostinglist; +-- +-- All the logic is in the test_ginpostinglist() function. It will throw +-- a error if something fails. +-- +SELECT test_ginpostinglist(); +NOTICE: testing with (0, 1), (0, 2), max 14 bytes +NOTICE: encoded 2 item pointers to 10 bytes +NOTICE: testing with (0, 1), (0, 291), max 14 bytes +NOTICE: encoded 2 item pointers to 10 bytes +NOTICE: testing with (0, 1), (4294967294, 291), max 14 bytes +NOTICE: encoded 1 item pointers to 8 bytes +NOTICE: testing with (0, 1), (4294967294, 291), max 16 bytes +NOTICE: encoded 2 item pointers to 16 bytes + test_ginpostinglist +--------------------- + +(1 row) + diff --git a/src/test/modules/test_ginpostinglist/sql/test_ginpostinglist.sql b/src/test/modules/test_ginpostinglist/sql/test_ginpostinglist.sql new file mode 100644 index 00000000000..b8cab7accec --- /dev/null +++ b/src/test/modules/test_ginpostinglist/sql/test_ginpostinglist.sql @@ -0,0 +1,7 @@ +CREATE EXTENSION test_ginpostinglist; + +-- +-- All the logic is in the test_ginpostinglist() function. It will throw +-- a error if something fails. +-- +SELECT test_ginpostinglist(); diff --git a/src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql b/src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql new file mode 100644 index 00000000000..37396a48424 --- /dev/null +++ b/src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql @@ -0,0 +1,8 @@ +/* src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_ginpostinglist" to load this file. \quit + +CREATE FUNCTION test_ginpostinglist() +RETURNS pg_catalog.void STRICT +AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_ginpostinglist/test_ginpostinglist.c b/src/test/modules/test_ginpostinglist/test_ginpostinglist.c new file mode 100644 index 00000000000..bab073bcecc --- /dev/null +++ b/src/test/modules/test_ginpostinglist/test_ginpostinglist.c @@ -0,0 +1,96 @@ +/*-------------------------------------------------------------------------- + * + * test_ginpostinglist.c + * Test varbyte-encoding in ginpostinglist.c + * + * Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_ginpostinglist/test_ginpostinglist.c + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "fmgr.h" +#include "access/ginblock.h" +#include "access/gin_private.h" +#include "access/htup_details.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(test_ginpostinglist); + +/* + * Encodes a pair of TIDs, and decodes it back. The first TID is always + * (0, 1), the second one is formed from the blk/off arguments. The 'maxsize' + * argument is passed to ginCompressPostingList(); it can be used to test the + * overflow checks. + * + * The reason that we test a pair, instead of just a single TID, is that + * the GinPostingList stores the first TID as is, and the varbyte-encoding + * is only used for the deltas between TIDs. So testing a single TID would + * not exercise the varbyte encoding at all. + * + * This function prints NOTICEs to describe what is tested, and how large the + * resulting GinPostingList is. Any incorrect results, e.g. if the encode + + * decode round trip doesn't return the original input, are reported as + * ERRORs. + */ +static void +test_itemptr_pair(BlockNumber blk, OffsetNumber off, int maxsize) +{ + ItemPointerData orig_itemptrs[2]; + ItemPointer decoded_itemptrs; + GinPostingList *pl; + int nwritten; + int ndecoded; + + elog(NOTICE, "testing with (%u, %d), (%u, %d), max %d bytes", + 0, 1, blk, off, maxsize); + ItemPointerSet(&orig_itemptrs[0], 0, 1); + ItemPointerSet(&orig_itemptrs[1], blk, off); + + /* Encode, and decode it back */ + pl = ginCompressPostingList(orig_itemptrs, 2, maxsize, &nwritten); + elog(NOTICE, "encoded %d item pointers to %zu bytes", + nwritten, SizeOfGinPostingList(pl)); + + if (SizeOfGinPostingList(pl) > maxsize) + elog(ERROR, "overflow: result was %zu bytes, max %d", + SizeOfGinPostingList(pl), maxsize); + + decoded_itemptrs = ginPostingListDecode(pl, &ndecoded); + if (nwritten != ndecoded) + elog(NOTICE, "encoded %d itemptrs, %d came back", nwritten, ndecoded); + + /* Check the result */ + if (!ItemPointerEquals(&orig_itemptrs[0], &decoded_itemptrs[0])) + elog(ERROR, "mismatch on first itemptr: (%u, %d) vs (%u, %d)", + 0, 1, + ItemPointerGetBlockNumber(&decoded_itemptrs[0]), + ItemPointerGetOffsetNumber(&decoded_itemptrs[0])); + + if (ndecoded == 2 && + !ItemPointerEquals(&orig_itemptrs[0], &decoded_itemptrs[0])) + { + elog(ERROR, "mismatch on second itemptr: (%u, %d) vs (%u, %d)", + 0, 1, + ItemPointerGetBlockNumber(&decoded_itemptrs[0]), + ItemPointerGetOffsetNumber(&decoded_itemptrs[0])); + } +} + +/* + * SQL-callable entry point to perform all tests. + */ +Datum +test_ginpostinglist(PG_FUNCTION_ARGS) +{ + test_itemptr_pair(0, 2, 14); + test_itemptr_pair(0, MaxHeapTuplesPerPage, 14); + test_itemptr_pair(MaxBlockNumber, MaxHeapTuplesPerPage, 14); + test_itemptr_pair(MaxBlockNumber, MaxHeapTuplesPerPage, 16); + + PG_RETURN_VOID(); +} diff --git a/src/test/modules/test_ginpostinglist/test_ginpostinglist.control b/src/test/modules/test_ginpostinglist/test_ginpostinglist.control new file mode 100644 index 00000000000..e4f5a7ceadb --- /dev/null +++ b/src/test/modules/test_ginpostinglist/test_ginpostinglist.control @@ -0,0 +1,4 @@ +comment = 'Test code for ginpostinglist.c' +default_version = '1.0' +module_pathname = '$libdir/test_ginpostinglist' +relocatable = true