1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Simplify tape block format.

No more indirect blocks. The blocks form a linked list instead.

This saves some memory, because we don't need to have a buffer in memory to
hold the indirect block (or blocks). To reflect that, TAPE_BUFFER_OVERHEAD
is reduced from 3 to 1 buffer, which allows using more memory for building
the initial runs.

Reviewed by Peter Geoghegan and Robert Haas.

Discussion: https://www.postgresql.org/message-id/34678beb-938e-646e-db9f-a7def5c44ada%40iki.fi
This commit is contained in:
Heikki Linnakangas
2016-12-22 18:45:00 +02:00
parent b86515da1a
commit 01ec25631f
3 changed files with 218 additions and 478 deletions

View File

@@ -240,16 +240,16 @@ typedef enum
* Parameters for calculation of number of tapes to use --- see inittapes()
* and tuplesort_merge_order().
*
* In this calculation we assume that each tape will cost us about 3 blocks
* worth of buffer space (which is an underestimate for very large data
* volumes, but it's probably close enough --- see logtape.c).
* In this calculation we assume that each tape will cost us about 1 blocks
* worth of buffer space. This ignores the overhead of all the other data
* structures needed for each tape, but it's probably close enough.
*
* MERGE_BUFFER_SIZE is how much data we'd like to read from each input
* tape during a preread cycle (see discussion at top of file).
*/
#define MINORDER 6 /* minimum merge order */
#define MAXORDER 500 /* maximum merge order */
#define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3)
#define TAPE_BUFFER_OVERHEAD BLCKSZ
#define MERGE_BUFFER_SIZE (BLCKSZ * 32)
/*
@@ -1849,6 +1849,7 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
SortTuple *stup)
{
unsigned int tuplen;
size_t nmoved;
switch (state->status)
{
@@ -1948,10 +1949,13 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* end of file; back up to fetch last tuple's ending length
* word. If seek fails we must have a completely empty file.
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
2 * sizeof(unsigned int)))
nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape,
2 * sizeof(unsigned int));
if (nmoved == 0)
return false;
else if (nmoved != 2 * sizeof(unsigned int))
elog(ERROR, "unexpected tape position");
state->eof_reached = false;
}
else
@@ -1960,31 +1964,34 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* Back up and fetch previously-returned tuple's ending length
* word. If seek fails, assume we are at start of file.
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
sizeof(unsigned int)))
nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape,
sizeof(unsigned int));
if (nmoved == 0)
return false;
else if (nmoved != sizeof(unsigned int))
elog(ERROR, "unexpected tape position");
tuplen = getlen(state, state->result_tape, false);
/*
* Back up to get ending length word of tuple before it.
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen + 2 * sizeof(unsigned int)))
nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen + 2 * sizeof(unsigned int));
if (nmoved == tuplen + sizeof(unsigned int))
{
/*
* If that fails, presumably the prev tuple is the first
* in the file. Back up so that it becomes next to read
* in forward direction (not obviously right, but that is
* what in-memory case does).
* We backed up over the previous tuple, but there was no
* ending length word before it. That means that the prev
* tuple is the first tuple in the file. It is now the
* next to read in forward direction (not obviously right,
* but that is what in-memory case does).
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen + sizeof(unsigned int)))
elog(ERROR, "bogus tuple length in backward scan");
return false;
}
else if (nmoved != tuplen + 2 * sizeof(unsigned int))
elog(ERROR, "bogus tuple length in backward scan");
}
tuplen = getlen(state, state->result_tape, false);
@@ -1994,9 +2001,10 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* Note: READTUP expects we are positioned after the initial
* length word of the tuple, so back up to that point.
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen))
nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen);
if (nmoved != tuplen)
elog(ERROR, "bogus tuple length in backward scan");
READTUP(state, stup, state->result_tape, tuplen);
@@ -3183,11 +3191,10 @@ tuplesort_restorepos(Tuplesortstate *state)
state->eof_reached = state->markpos_eof;
break;
case TSS_SORTEDONTAPE:
if (!LogicalTapeSeek(state->tapeset,
state->result_tape,
state->markpos_block,
state->markpos_offset))
elog(ERROR, "tuplesort_restorepos failed");
LogicalTapeSeek(state->tapeset,
state->result_tape,
state->markpos_block,
state->markpos_offset);
state->eof_reached = state->markpos_eof;
break;
default: