mirror of
https://github.com/postgres/postgres.git
synced 2025-04-18 13:44:19 +03:00
contrib/tsm_system_time
This commit is contained in:
parent
4d40494b11
commit
56e121a508
4
contrib/tsm_system_time/.gitignore
vendored
Normal file
4
contrib/tsm_system_time/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Generated subdirectories
|
||||||
|
/log/
|
||||||
|
/results/
|
||||||
|
/tmp_check/
|
21
contrib/tsm_system_time/Makefile
Normal file
21
contrib/tsm_system_time/Makefile
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# src/test/modules/tsm_system_time/Makefile
|
||||||
|
|
||||||
|
MODULE_big = tsm_system_time
|
||||||
|
OBJS = tsm_system_time.o $(WIN32RES)
|
||||||
|
PGFILEDESC = "tsm_system_time - SYSTEM TABLESAMPLE method which accepts number rows of as a limit"
|
||||||
|
|
||||||
|
EXTENSION = tsm_system_time
|
||||||
|
DATA = tsm_system_time--1.0.sql
|
||||||
|
|
||||||
|
REGRESS = tsm_system_time
|
||||||
|
|
||||||
|
ifdef USE_PGXS
|
||||||
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS)
|
||||||
|
else
|
||||||
|
subdir = contrib/tsm_system_time
|
||||||
|
top_builddir = ../..
|
||||||
|
include $(top_builddir)/src/Makefile.global
|
||||||
|
include $(top_srcdir)/contrib/contrib-global.mk
|
||||||
|
endif
|
54
contrib/tsm_system_time/expected/tsm_system_time.out
Normal file
54
contrib/tsm_system_time/expected/tsm_system_time.out
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
CREATE EXTENSION tsm_system_time;
|
||||||
|
CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
|
||||||
|
INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
|
||||||
|
ANALYZE test_tablesample;
|
||||||
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
31
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
|
||||||
|
id
|
||||||
|
----
|
||||||
|
7
|
||||||
|
14
|
||||||
|
21
|
||||||
|
28
|
||||||
|
4
|
||||||
|
11
|
||||||
|
18
|
||||||
|
25
|
||||||
|
1
|
||||||
|
8
|
||||||
|
15
|
||||||
|
22
|
||||||
|
29
|
||||||
|
5
|
||||||
|
12
|
||||||
|
19
|
||||||
|
26
|
||||||
|
2
|
||||||
|
9
|
||||||
|
16
|
||||||
|
23
|
||||||
|
30
|
||||||
|
6
|
||||||
|
13
|
||||||
|
20
|
||||||
|
27
|
||||||
|
3
|
||||||
|
10
|
||||||
|
17
|
||||||
|
24
|
||||||
|
0
|
||||||
|
(31 rows)
|
||||||
|
|
||||||
|
EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------------------------------------------------
|
||||||
|
Sample Scan (system_time) on test_tablesample (cost=0.00..100.25 rows=25 width=4)
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- done
|
||||||
|
DROP TABLE test_tablesample CASCADE;
|
14
contrib/tsm_system_time/sql/tsm_system_time.sql
Normal file
14
contrib/tsm_system_time/sql/tsm_system_time.sql
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
CREATE EXTENSION tsm_system_time;
|
||||||
|
|
||||||
|
CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
|
||||||
|
|
||||||
|
INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
|
||||||
|
ANALYZE test_tablesample;
|
||||||
|
|
||||||
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
|
||||||
|
SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
|
||||||
|
|
||||||
|
EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
|
||||||
|
|
||||||
|
-- done
|
||||||
|
DROP TABLE test_tablesample CASCADE;
|
40
contrib/tsm_system_time/tsm_system_time--1.0.sql
Normal file
40
contrib/tsm_system_time/tsm_system_time--1.0.sql
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
/* src/test/modules/tablesample/tsm_system_time--1.0.sql */
|
||||||
|
|
||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
|
\echo Use "CREATE EXTENSION tsm_system_time" to load this file. \quit
|
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_init(internal, int4, int4)
|
||||||
|
RETURNS void
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_nextblock(internal)
|
||||||
|
RETURNS int4
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_nexttuple(internal, int4, int2)
|
||||||
|
RETURNS int2
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_end(internal)
|
||||||
|
RETURNS void
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_reset(internal)
|
||||||
|
RETURNS void
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION tsm_system_time_cost(internal, internal, internal, internal, internal, internal, internal)
|
||||||
|
RETURNS void
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
INSERT INTO pg_tablesample_method VALUES('system_time', false, true,
|
||||||
|
'tsm_system_time_init', 'tsm_system_time_nextblock',
|
||||||
|
'tsm_system_time_nexttuple', '-', 'tsm_system_time_end',
|
||||||
|
'tsm_system_time_reset', 'tsm_system_time_cost');
|
||||||
|
|
315
contrib/tsm_system_time/tsm_system_time.c
Normal file
315
contrib/tsm_system_time/tsm_system_time.c
Normal file
@ -0,0 +1,315 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* tsm_system_time.c
|
||||||
|
* interface routines for system_time tablesample method
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* contrib/tsm_system_time_rowlimit/tsm_system_time.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "fmgr.h"
|
||||||
|
|
||||||
|
#include "access/tablesample.h"
|
||||||
|
#include "access/relscan.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "nodes/execnodes.h"
|
||||||
|
#include "nodes/relation.h"
|
||||||
|
#include "optimizer/clauses.h"
|
||||||
|
#include "storage/bufmgr.h"
|
||||||
|
#include "utils/sampling.h"
|
||||||
|
#include "utils/spccache.h"
|
||||||
|
#include "utils/timestamp.h"
|
||||||
|
|
||||||
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* State
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
SamplerRandomState randstate;
|
||||||
|
uint32 seed; /* random seed */
|
||||||
|
BlockNumber nblocks; /* number of block in relation */
|
||||||
|
int32 time; /* time limit for sampling */
|
||||||
|
TimestampTz start_time; /* start time of sampling */
|
||||||
|
TimestampTz end_time; /* end time of sampling */
|
||||||
|
OffsetNumber lt; /* last tuple returned from current block */
|
||||||
|
BlockNumber step; /* step size */
|
||||||
|
BlockNumber lb; /* last block visited */
|
||||||
|
BlockNumber estblocks; /* estimated number of returned blocks (moving) */
|
||||||
|
BlockNumber doneblocks; /* number of already returned blocks */
|
||||||
|
} SystemSamplerData;
|
||||||
|
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_init);
|
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_nextblock);
|
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_nexttuple);
|
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_end);
|
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_reset);
|
||||||
|
PG_FUNCTION_INFO_V1(tsm_system_time_cost);
|
||||||
|
|
||||||
|
static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initializes the state.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsm_system_time_init(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
|
||||||
|
uint32 seed = PG_GETARG_UINT32(1);
|
||||||
|
int32 time = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
|
||||||
|
HeapScanDesc scan = tsdesc->heapScan;
|
||||||
|
SystemSamplerData *sampler;
|
||||||
|
|
||||||
|
if (time < 1)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
||||||
|
errmsg("invalid time limit"),
|
||||||
|
errhint("Time limit must be positive integer value.")));
|
||||||
|
|
||||||
|
sampler = palloc0(sizeof(SystemSamplerData));
|
||||||
|
|
||||||
|
/* Remember initial values for reinit */
|
||||||
|
sampler->seed = seed;
|
||||||
|
sampler->nblocks = scan->rs_nblocks;
|
||||||
|
sampler->lt = InvalidOffsetNumber;
|
||||||
|
sampler->estblocks = 2;
|
||||||
|
sampler->doneblocks = 0;
|
||||||
|
sampler->time = time;
|
||||||
|
sampler->start_time = GetCurrentTimestamp();
|
||||||
|
sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
|
||||||
|
sampler->time);
|
||||||
|
|
||||||
|
sampler_random_init_state(sampler->seed, sampler->randstate);
|
||||||
|
|
||||||
|
/* Find relative prime as step size for linear probing. */
|
||||||
|
sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
|
||||||
|
/*
|
||||||
|
* Randomize start position so that blocks close to step size don't have
|
||||||
|
* higher probability of being chosen on very short scan.
|
||||||
|
*/
|
||||||
|
sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
|
||||||
|
|
||||||
|
tsdesc->tsmdata = (void *) sampler;
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get next block number or InvalidBlockNumber when we're done.
|
||||||
|
*
|
||||||
|
* Uses linear probing algorithm for picking next block.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsm_system_time_nextblock(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
|
||||||
|
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
|
||||||
|
|
||||||
|
sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
|
||||||
|
sampler->doneblocks++;
|
||||||
|
|
||||||
|
/* All blocks have been read, we're done */
|
||||||
|
if (sampler->doneblocks > sampler->nblocks)
|
||||||
|
PG_RETURN_UINT32(InvalidBlockNumber);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update the estimations for time limit at least 10 times per estimated
|
||||||
|
* number of returned blocks to handle variations in block read speed.
|
||||||
|
*/
|
||||||
|
if (sampler->doneblocks % Max(sampler->estblocks/10, 1) == 0)
|
||||||
|
{
|
||||||
|
TimestampTz now = GetCurrentTimestamp();
|
||||||
|
long secs;
|
||||||
|
int usecs;
|
||||||
|
int usecs_remaining;
|
||||||
|
int time_per_block;
|
||||||
|
|
||||||
|
TimestampDifference(sampler->start_time, now, &secs, &usecs);
|
||||||
|
usecs += (int) secs * 1000000;
|
||||||
|
|
||||||
|
time_per_block = usecs / sampler->doneblocks;
|
||||||
|
|
||||||
|
/* No time left, end. */
|
||||||
|
TimestampDifference(now, sampler->end_time, &secs, &usecs);
|
||||||
|
if (secs <= 0 && usecs <= 0)
|
||||||
|
PG_RETURN_UINT32(InvalidBlockNumber);
|
||||||
|
|
||||||
|
/* Remaining microseconds */
|
||||||
|
usecs_remaining = usecs + (int) secs * 1000000;
|
||||||
|
|
||||||
|
/* Recalculate estimated returned number of blocks */
|
||||||
|
if (time_per_block < usecs_remaining && time_per_block > 0)
|
||||||
|
sampler->estblocks = sampler->time * time_per_block;
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_UINT32(sampler->lb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get next tuple offset in current block or InvalidOffsetNumber if we are done
|
||||||
|
* with this block.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsm_system_time_nexttuple(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
|
||||||
|
OffsetNumber maxoffset = PG_GETARG_UINT16(2);
|
||||||
|
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
|
||||||
|
OffsetNumber tupoffset = sampler->lt;
|
||||||
|
|
||||||
|
if (tupoffset == InvalidOffsetNumber)
|
||||||
|
tupoffset = FirstOffsetNumber;
|
||||||
|
else
|
||||||
|
tupoffset++;
|
||||||
|
|
||||||
|
if (tupoffset > maxoffset)
|
||||||
|
tupoffset = InvalidOffsetNumber;
|
||||||
|
|
||||||
|
sampler->lt = tupoffset;
|
||||||
|
|
||||||
|
PG_RETURN_UINT16(tupoffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cleanup method.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsm_system_time_end(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
|
||||||
|
|
||||||
|
pfree(tsdesc->tsmdata);
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset state (called by ReScan).
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsm_system_time_reset(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
|
||||||
|
SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
|
||||||
|
|
||||||
|
sampler->lt = InvalidOffsetNumber;
|
||||||
|
sampler->start_time = GetCurrentTimestamp();
|
||||||
|
sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
|
||||||
|
sampler->time);
|
||||||
|
sampler->estblocks = 2;
|
||||||
|
sampler->doneblocks = 0;
|
||||||
|
|
||||||
|
sampler_random_init_state(sampler->seed, sampler->randstate);
|
||||||
|
sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
|
||||||
|
sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Costing function.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsm_system_time_cost(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
|
||||||
|
Path *path = (Path *) PG_GETARG_POINTER(1);
|
||||||
|
RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
|
||||||
|
List *args = (List *) PG_GETARG_POINTER(3);
|
||||||
|
BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
|
||||||
|
double *tuples = (double *) PG_GETARG_POINTER(5);
|
||||||
|
Node *limitnode;
|
||||||
|
int32 time;
|
||||||
|
BlockNumber relpages;
|
||||||
|
double reltuples;
|
||||||
|
double density;
|
||||||
|
double spc_random_page_cost;
|
||||||
|
|
||||||
|
limitnode = linitial(args);
|
||||||
|
limitnode = estimate_expression_value(root, limitnode);
|
||||||
|
|
||||||
|
if (IsA(limitnode, RelabelType))
|
||||||
|
limitnode = (Node *) ((RelabelType *) limitnode)->arg;
|
||||||
|
|
||||||
|
if (IsA(limitnode, Const))
|
||||||
|
time = DatumGetInt32(((Const *) limitnode)->constvalue);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Default time (1s) if the estimation didn't return Const. */
|
||||||
|
time = 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
relpages = baserel->pages;
|
||||||
|
reltuples = baserel->tuples;
|
||||||
|
|
||||||
|
/* estimate the tuple density */
|
||||||
|
if (relpages > 0)
|
||||||
|
density = reltuples / (double) relpages;
|
||||||
|
else
|
||||||
|
density = (BLCKSZ - SizeOfPageHeaderData) / baserel->width;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We equal random page cost value to number of ms it takes to read the
|
||||||
|
* random page here which is far from accurate but we don't have anything
|
||||||
|
* better to base our predicted page reads.
|
||||||
|
*/
|
||||||
|
get_tablespace_page_costs(baserel->reltablespace,
|
||||||
|
&spc_random_page_cost,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assumption here is that we'll never read less then 1% of table pages,
|
||||||
|
* this is here mainly because it is much less bad to overestimate than
|
||||||
|
* underestimate and using just spc_random_page_cost will probably lead
|
||||||
|
* to underestimations in general.
|
||||||
|
*/
|
||||||
|
*pages = Min(baserel->pages, Max(time/spc_random_page_cost, baserel->pages/100));
|
||||||
|
*tuples = rint(density * (double) *pages * path->rows / baserel->tuples);
|
||||||
|
path->rows = *tuples;
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32
|
||||||
|
gcd (uint32 a, uint32 b)
|
||||||
|
{
|
||||||
|
uint32 c;
|
||||||
|
|
||||||
|
while (a != 0)
|
||||||
|
{
|
||||||
|
c = a;
|
||||||
|
a = b % a;
|
||||||
|
b = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32
|
||||||
|
random_relative_prime(uint32 n, SamplerRandomState randstate)
|
||||||
|
{
|
||||||
|
/* Pick random starting number, with some limits on what it can be. */
|
||||||
|
uint32 r = (uint32) sampler_random_fract(randstate) * n/2 + n/4,
|
||||||
|
t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This should only take 2 or 3 iterations as the probability of 2 numbers
|
||||||
|
* being relatively prime is ~61%.
|
||||||
|
*/
|
||||||
|
while ((t = gcd(r, n)) > 1)
|
||||||
|
{
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
r /= t;
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
5
contrib/tsm_system_time/tsm_system_time.control
Normal file
5
contrib/tsm_system_time/tsm_system_time.control
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# tsm_system_time extension
|
||||||
|
comment = 'SYSTEM TABLESAMPLE method which accepts time in milliseconds as a limit'
|
||||||
|
default_version = '1.0'
|
||||||
|
module_pathname = '$libdir/tsm_system_time'
|
||||||
|
relocatable = true
|
Loading…
x
Reference in New Issue
Block a user