1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-11 20:28:21 +03:00

tableam: sample scan.

This moves sample scan support to below tableam. It's not optional as
there is, in contrast to e.g. bitmap heap scans, no alternative way to
perform tablesample queries. If an AM can't deal with the block based
API, it will have to throw an ERROR.

The tableam callbacks for this are block based, but given the current
TsmRoutine interface, that seems to be required.

The new interface doesn't require TsmRoutines to perform visibility
checks anymore - that requires the TsmRoutine to know details about
the AM, which we want to avoid.  To continue to allow taking the
returned number of tuples account SampleScanState now has a donetuples
field (which previously e.g. existed in SystemRowsSamplerData), which
is only incremented after the visibility check succeeds.

Author: Andres Freund
Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
This commit is contained in:
Andres Freund
2019-03-30 20:18:53 -07:00
parent 4bb50236eb
commit 73c954d248
10 changed files with 382 additions and 312 deletions

View File

@ -26,7 +26,6 @@
#include <math.h>
#include "access/heapam.h"
#include "access/relscan.h"
#include "access/tsmapi.h"
#include "catalog/pg_type.h"
@ -66,7 +65,7 @@ static void system_time_beginsamplescan(SampleScanState *node,
Datum *params,
int nparams,
uint32 seed);
static BlockNumber system_time_nextsampleblock(SampleScanState *node);
static BlockNumber system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
BlockNumber blockno,
OffsetNumber maxoffset);
@ -213,11 +212,9 @@ system_time_beginsamplescan(SampleScanState *node,
* Uses linear probing algorithm for picking next block.
*/
static BlockNumber
system_time_nextsampleblock(SampleScanState *node)
system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
{
SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
TableScanDesc scan = node->ss.ss_currentScanDesc;
HeapScanDesc hscan = (HeapScanDesc) scan;
instr_time cur_time;
/* First call within scan? */
@ -230,14 +227,14 @@ system_time_nextsampleblock(SampleScanState *node)
SamplerRandomState randstate;
/* If relation is empty, there's nothing to scan */
if (hscan->rs_nblocks == 0)
if (nblocks == 0)
return InvalidBlockNumber;
/* We only need an RNG during this setup step */
sampler_random_init_state(sampler->seed, randstate);
/* Compute nblocks/firstblock/step only once per query */
sampler->nblocks = hscan->rs_nblocks;
sampler->nblocks = nblocks;
/* Choose random starting block within the relation */
/* (Actually this is the predecessor of the first block visited) */
@ -273,7 +270,7 @@ system_time_nextsampleblock(SampleScanState *node)
{
/* Advance lb, using uint64 arithmetic to forestall overflow */
sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
} while (sampler->lb >= hscan->rs_nblocks);
} while (sampler->lb >= nblocks);
return sampler->lb;
}