1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-23 14:01:44 +03:00

Separate block sampling functions

Refactoring ahead of tablesample patch

Requested and reviewed by Michael Paquier

Petr Jelinek
This commit is contained in:
Simon Riggs
2015-05-15 04:02:54 +02:00
parent 5a3022fde0
commit 83e176ec18
7 changed files with 287 additions and 232 deletions

View File

@ -34,6 +34,7 @@
#include "optimizer/var.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/sampling.h"
PG_MODULE_MAGIC;
@ -1006,7 +1007,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
{
int numrows = 0;
double rowstoskip = -1; /* -1 means not set yet */
double rstate;
ReservoirStateData rstate;
TupleDesc tupDesc;
Datum *values;
bool *nulls;
@ -1044,7 +1045,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
ALLOCSET_DEFAULT_MAXSIZE);
/* Prepare for sampling rows */
rstate = anl_init_selection_state(targrows);
reservoir_init_selection_state(&rstate, targrows);
/* Set up callback to identify error line number. */
errcallback.callback = CopyFromErrorCallback;
@ -1088,7 +1089,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
* not-yet-incremented value of totalrows as t.
*/
if (rowstoskip < 0)
rowstoskip = anl_get_next_S(*totalrows, targrows, &rstate);
rowstoskip = reservoir_get_next_S(&rstate, *totalrows, targrows);
if (rowstoskip <= 0)
{
@ -1096,7 +1097,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
* Found a suitable tuple, so save it, replacing one old tuple
* at random
*/
int k = (int) (targrows * anl_random_fract());
int k = (int) (targrows * sampler_random_fract());
Assert(k >= 0 && k < targrows);
heap_freetuple(rows[k]);

View File

@ -37,6 +37,7 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/sampling.h"
PG_MODULE_MAGIC;
@ -202,7 +203,7 @@ typedef struct PgFdwAnalyzeState
/* for random sampling */
double samplerows; /* # of rows fetched */
double rowstoskip; /* # of rows to skip before next sample */
double rstate; /* random state */
ReservoirStateData rstate; /* state for reservoir sampling*/
/* working memory contexts */
MemoryContext anl_cxt; /* context for per-analyze lifespan data */
@ -2411,7 +2412,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
astate.numrows = 0;
astate.samplerows = 0;
astate.rowstoskip = -1; /* -1 means not set yet */
astate.rstate = anl_init_selection_state(targrows);
reservoir_init_selection_state(&astate.rstate, targrows);
/* Remember ANALYZE context, and create a per-tuple temp context */
astate.anl_cxt = CurrentMemoryContext;
@ -2551,13 +2552,12 @@ analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate)
* analyze.c; see Jeff Vitter's paper.
*/
if (astate->rowstoskip < 0)
astate->rowstoskip = anl_get_next_S(astate->samplerows, targrows,
&astate->rstate);
astate->rowstoskip = reservoir_get_next_S(&astate->rstate, astate->samplerows, targrows);
if (astate->rowstoskip <= 0)
{
/* Choose a random reservoir element to replace. */
pos = (int) (targrows * anl_random_fract());
pos = (int) (targrows * sampler_random_fract());
Assert(pos >= 0 && pos < targrows);
heap_freetuple(astate->rows[pos]);
}