1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00

Add general purpose hasing functions to pgbench.

Hashing function is useful for simulating real-world workload in test like
WEB workload, as an example - YCSB benchmarks.

Author: Ildar Musin with minor editorization by me
Reviewed by: Fabien Coelho, me
Discussion: https://www.postgresql.org/message-id/flat/0e8bd39e-dfcd-2879-f88f-272799ad7ef2@postgrespro.ru
This commit is contained in:
Teodor Sigaev
2018-03-21 18:01:23 +03:00
parent 8bb3c7d347
commit e51a04840a
5 changed files with 237 additions and 32 deletions

View File

@@ -16,6 +16,10 @@
#include "pgbench.h"
#define PGBENCH_NARGS_VARIABLE (-1)
#define PGBENCH_NARGS_CASE (-2)
#define PGBENCH_NARGS_HASH (-3)
PgBenchExpr *expr_parse_result;
static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
@@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator, PgBenchExpr *expr)
/*
* List of available functions:
* - fname: function name, "!..." for special internal functions
* - nargs: number of arguments
* -1 is a special value for least & greatest meaning #args >= 1
* -2 is for the "CASE WHEN ..." function, which has #args >= 3 and odd
* - nargs: number of arguments. Special cases:
* - PGBENCH_NARGS_VARIABLE is a special value for least & greatest
* meaning #args >= 1;
* - PGBENCH_NARGS_CASE is for the "CASE WHEN ..." function, which
* has #args >= 3 and odd;
* - PGBENCH_NARGS_HASH is for hash functions, which have one required
* and one optional argument;
* - tag: function identifier from PgBenchFunction enum
*/
static const struct
@@ -259,10 +267,10 @@ static const struct
"abs", 1, PGBENCH_ABS
},
{
"least", -1, PGBENCH_LEAST
"least", PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST
},
{
"greatest", -1, PGBENCH_GREATEST
"greatest", PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST
},
{
"debug", 1, PGBENCH_DEBUG
@@ -347,7 +355,25 @@ static const struct
},
/* "case when ... then ... else ... end" construction */
{
"!case_end", -2, PGBENCH_CASE
"!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE
},
{
"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
},
{
"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
},
{
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
},
{
"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
},
{
"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
},
{
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
},
/* keep as last array element */
{
@@ -423,29 +449,51 @@ elist_length(PgBenchExprList *list)
static PgBenchExpr *
make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
{
int len = elist_length(args);
PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
Assert(fnumber >= 0);
if (PGBENCH_FUNCTIONS[fnumber].nargs >= 0 &&
PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args))
expr_yyerror_more(yyscanner, "unexpected number of arguments",
PGBENCH_FUNCTIONS[fnumber].fname);
/* check at least one arg for least & greatest */
if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
elist_length(args) == 0)
expr_yyerror_more(yyscanner, "at least one argument expected",
PGBENCH_FUNCTIONS[fnumber].fname);
/* special case: case (when ... then ...)+ (else ...)? end */
if (PGBENCH_FUNCTIONS[fnumber].nargs == -2)
/* validate arguments number including few special cases */
switch (PGBENCH_FUNCTIONS[fnumber].nargs)
{
int len = elist_length(args);
/* check at least one arg for least & greatest */
case PGBENCH_NARGS_VARIABLE:
if (len == 0)
expr_yyerror_more(yyscanner, "at least one argument expected",
PGBENCH_FUNCTIONS[fnumber].fname);
break;
/* 'else' branch is always present, but could be a NULL-constant */
if (len < 3 || len % 2 != 1)
expr_yyerror_more(yyscanner, "odd and >= 3 number of arguments expected",
"case control structure");
/* case (when ... then ...)+ (else ...)? end */
case PGBENCH_NARGS_CASE:
/* 'else' branch is always present, but could be a NULL-constant */
if (len < 3 || len % 2 != 1)
expr_yyerror_more(yyscanner,
"odd and >= 3 number of arguments expected",
"case control structure");
break;
/* hash functions with optional seed argument */
case PGBENCH_NARGS_HASH:
if (len > 2)
expr_yyerror_more(yyscanner, "unexpected number of arguments",
PGBENCH_FUNCTIONS[fnumber].fname);
if (len == 1)
{
PgBenchExpr *var = make_variable("default_seed");
args = make_elist(var, args);
}
break;
/* common case: positive arguments number */
default:
Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
if (PGBENCH_FUNCTIONS[fnumber].nargs != len)
expr_yyerror_more(yyscanner, "unexpected number of arguments",
PGBENCH_FUNCTIONS[fnumber].fname);
}
expr->etype = ENODE_FUNCTION;

View File

@@ -60,6 +60,14 @@
#define ERRCODE_UNDEFINED_TABLE "42P01"
/*
* Hashing constants
*/
#define FNV_PRIME 0x100000001b3
#define FNV_OFFSET_BASIS 0xcbf29ce484222325
#define MM2_MUL 0xc6a4a7935bd1e995
#define MM2_ROT 47
/*
* Multi-platform pthread implementations
*/
@@ -915,6 +923,54 @@ getZipfianRand(TState *thread, int64 min, int64 max, double s)
: computeHarmonicZipfian(thread, n, s));
}
/*
* FNV-1a hash function
*/
static int64
getHashFnv1a(int64 val, uint64 seed)
{
int64 result;
int i;
result = FNV_OFFSET_BASIS ^ seed;
for (i = 0; i < 8; ++i)
{
int32 octet = val & 0xff;
val = val >> 8;
result = result ^ octet;
result = result * FNV_PRIME;
}
return result;
}
/*
* Murmur2 hash function
*
* Based on original work of Austin Appleby
* https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
*/
static int64
getHashMurmur2(int64 val, uint64 seed)
{
uint64 result = seed ^ (sizeof(int64) * MM2_MUL);
uint64 k = (uint64) val;
k *= MM2_MUL;
k ^= k >> MM2_ROT;
k *= MM2_MUL;
result ^= k;
result *= MM2_MUL;
result ^= result >> MM2_ROT;
result *= MM2_MUL;
result ^= result >> MM2_ROT;
return (int64) result;
}
/*
* Initialize the given SimpleStats struct to all zeroes
*/
@@ -2211,6 +2267,30 @@ evalStandardFunc(TState *thread, CState *st,
return true;
}
/* hashing */
case PGBENCH_HASH_FNV1A:
case PGBENCH_HASH_MURMUR2:
{
int64 val,
seed;
Assert(nargs == 2);
if (!coerceToInt(&vargs[0], &val) ||
!coerceToInt(&vargs[1], &seed))
return false;
if (func == PGBENCH_HASH_MURMUR2)
setIntValue(retval, getHashMurmur2(val, seed));
else if (func == PGBENCH_HASH_FNV1A)
setIntValue(retval, getHashFnv1a(val, seed));
else
/* cannot get here */
Assert(0);
return true;
}
default:
/* cannot get here */
Assert(0);
@@ -4963,6 +5043,10 @@ main(int argc, char **argv)
exit(1);
}
/* set random seed */
INSTR_TIME_SET_CURRENT(start_time);
srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
if (internal_script_used)
{
/*
@@ -5024,6 +5108,19 @@ main(int argc, char **argv)
}
}
/* set default seed for hash functions */
if (lookupVariable(&state[0], "default_seed") == NULL)
{
uint64 seed = ((uint64) (random() & 0xFFFF) << 48) |
((uint64) (random() & 0xFFFF) << 32) |
((uint64) (random() & 0xFFFF) << 16) |
(uint64) (random() & 0xFFFF);
for (i = 0; i < nclients; i++)
if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
exit(1);
}
if (!is_no_vacuum)
{
fprintf(stderr, "starting vacuum...");
@@ -5041,10 +5138,6 @@ main(int argc, char **argv)
}
PQfinish(con);
/* set random seed */
INSTR_TIME_SET_CURRENT(start_time);
srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
/* set up thread data structures */
threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
nclients_dealt = 0;

View File

@@ -97,7 +97,9 @@ typedef enum PgBenchFunction
PGBENCH_LE,
PGBENCH_LT,
PGBENCH_IS,
PGBENCH_CASE
PGBENCH_CASE,
PGBENCH_HASH_FNV1A,
PGBENCH_HASH_MURMUR2
} PgBenchFunction;
typedef struct PgBenchExpr PgBenchExpr;

View File

@@ -259,6 +259,11 @@ pgbench(
qr{command=46.: int 46\b},
qr{command=47.: boolean true\b},
qr{command=48.: boolean true\b},
qr{command=49.: int -5817877081768721676\b},
qr{command=50.: boolean true\b},
qr{command=51.: int -7793829335365542153\b},
qr{command=52.: int -?\d+\b},
qr{command=53.: boolean true\b},
],
'pgbench expressions',
{ '001_pgbench_expressions' => q{-- integer functions
@@ -327,6 +332,12 @@ pgbench(
\set n6 debug(:n IS NULL AND NOT :f AND :t)
-- conditional truth
\set cs debug(CASE WHEN 1 THEN TRUE END AND CASE WHEN 1.0 THEN TRUE END AND CASE WHEN :n THEN NULL ELSE TRUE END)
-- hash functions
\set h0 debug(hash(10, 5432))
\set h1 debug(:h0 = hash_murmur2(10, 5432))
\set h3 debug(hash_fnv1a(10, 5432))
\set h4 debug(hash(10))
\set h5 debug(hash(10) = hash(10, :default_seed))
-- lazy evaluation
\set zy 0
\set yz debug(case when :zy = 0 then -1 else (1 / :zy) end)