diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index f11d36620d6..ee2501be552 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -1543,29 +1543,17 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / middle quarter (1.0 / 4.0) of the interval (i.e. from <literal>3.0 / 8.0</literal> to <literal>5.0 / 8.0</literal>) and 95% from the middle half (<literal>2.0 / 4.0</literal>) of the interval (second and third - quartiles). The minimum <replaceable>parameter</replaceable> is 2.0 for performance - of the Box-Muller transform. + quartiles). The minimum allowed <replaceable>parameter</replaceable> + value is 2.0. </para> </listitem> <listitem> <para> - <literal>random_zipfian</literal> generates an approximated bounded Zipfian - distribution. For <replaceable>parameter</replaceable> in (0, 1), an - approximated algorithm is taken from - "Quickly Generating Billion-Record Synthetic Databases", - Jim Gray et al, SIGMOD 1994. For <replaceable>parameter</replaceable> - in (1, 1000), a rejection method is used, based on - "Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551, - Springer 1986. The distribution is not defined when the parameter's - value is 1.0. The function's performance is poor for parameter values - close and above 1.0 and on a small range. - </para> - <para> + <literal>random_zipfian</literal> generates a bounded Zipfian + distribution. <replaceable>parameter</replaceable> defines how skewed the distribution is. The larger the <replaceable>parameter</replaceable>, the more frequently values closer to the beginning of the interval are drawn. - The closer to 0 <replaceable>parameter</replaceable> is, - the flatter (more uniform) the output distribution. The distribution is such that, assuming the range starts from 1, the ratio of the probability of drawing <replaceable>k</replaceable> versus drawing <replaceable>k+1</replaceable> is @@ -1576,6 +1564,13 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / itself is produced <literal>(3/2)*2.5 = 2.76</literal> times more frequently than <literal>3</literal>, and so on. </para> + <para> + <application>pgbench</application>'s implementation is based on + "Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551, + Springer 1986. Due to limitations of that algorithm, + the <replaceable>parameter</replaceable> value is restricted to + the range [1.001, 1000]. + </para> </listitem> </itemizedlist> diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 93e2d350e73..e5078af7961 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -135,10 +135,10 @@ static int pthread_join(pthread_t th, void **thread_return); #define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define DEFAULT_NXACTS 10 /* default nxacts */ -#define ZIPF_CACHE_SIZE 15 /* cache cells number */ - #define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */ -#define MAX_ZIPFIAN_PARAM 1000 /* maximum parameter for zipfian */ + +#define MIN_ZIPFIAN_PARAM 1.001 /* minimum parameter for zipfian */ +#define MAX_ZIPFIAN_PARAM 1000.0 /* maximum parameter for zipfian */ int nxacts = 0; /* number of transactions per client */ int duration = 0; /* duration in seconds */ @@ -410,35 +410,6 @@ typedef struct int ecnt; /* error count */ } CState; -/* - * Cache cell for random_zipfian call - */ -typedef struct -{ - /* cell keys */ - double s; /* s - parameter of random_zipfian function */ - int64 n; /* number of elements in range (max - min + 1) */ - - double harmonicn; /* generalizedHarmonicNumber(n, s) */ - double alpha; - double beta; - double eta; - - uint64 last_used; /* last used logical time */ -} ZipfCell; - -/* - * Zipf cache for zeta values - */ -typedef struct -{ - uint64 current; /* counter for LRU cache replacement algorithm */ - - int nb_cells; /* number of filled cells */ - int overflowCount; /* number of cache overflows */ - ZipfCell cells[ZIPF_CACHE_SIZE]; -} ZipfCache; - /* * Thread state */ @@ -460,8 +431,6 @@ typedef struct int64 throttle_trigger; /* previous/next throttling (us) */ FILE *logfile; /* where to log, or NULL */ - ZipfCache zipf_cache; /* for thread-safe zipfian random number - * generation */ /* per thread collected stats */ instr_time start_time; /* thread start time */ @@ -975,77 +944,12 @@ getPoissonRand(RandomState *random_state, double center) return (int64) (-log(uniform) * center + 0.5); } -/* helper function for getZipfianRand */ -static double -generalizedHarmonicNumber(int64 n, double s) -{ - int i; - double ans = 0.0; - - for (i = n; i > 1; i--) - ans += pow(i, -s); - return ans + 1.0; -} - -/* set harmonicn and other parameters to cache cell */ -static void -zipfSetCacheCell(ZipfCell *cell, int64 n, double s) -{ - double harmonic2; - - cell->n = n; - cell->s = s; - - harmonic2 = generalizedHarmonicNumber(2, s); - cell->harmonicn = generalizedHarmonicNumber(n, s); - - cell->alpha = 1.0 / (1.0 - s); - cell->beta = pow(0.5, s); - cell->eta = (1.0 - pow(2.0 / n, 1.0 - s)) / (1.0 - harmonic2 / cell->harmonicn); -} - -/* - * search for cache cell with keys (n, s) - * and create new cell if it does not exist - */ -static ZipfCell * -zipfFindOrCreateCacheCell(ZipfCache *cache, int64 n, double s) -{ - int i, - least_recently_used = 0; - ZipfCell *cell; - - /* search cached cell for given parameters */ - for (i = 0; i < cache->nb_cells; i++) - { - cell = &cache->cells[i]; - if (cell->n == n && cell->s == s) - return &cache->cells[i]; - - if (cell->last_used < cache->cells[least_recently_used].last_used) - least_recently_used = i; - } - - /* create new one if it does not exist */ - if (cache->nb_cells < ZIPF_CACHE_SIZE) - i = cache->nb_cells++; - else - { - /* replace LRU cell if cache is full */ - i = least_recently_used; - cache->overflowCount++; - } - - zipfSetCacheCell(&cache->cells[i], n, s); - - cache->cells[i].last_used = cache->current++; - return &cache->cells[i]; -} - /* * Computing zipfian using rejection method, based on * "Non-Uniform Random Variate Generation", * Luc Devroye, p. 550-551, Springer 1986. + * + * This works for s > 1.0, but may perform badly for s very close to 1.0. */ static int64 computeIterativeZipfian(RandomState *random_state, int64 n, double s) @@ -1056,6 +960,10 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s) u, v; + /* Ensure n is sane */ + if (n <= 1) + return 1; + while (true) { /* random variates */ @@ -1072,39 +980,16 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s) return (int64) x; } -/* - * Computing zipfian using harmonic numbers, based on algorithm described in - * "Quickly Generating Billion-Record Synthetic Databases", - * Jim Gray et al, SIGMOD 1994 - */ -static int64 -computeHarmonicZipfian(ZipfCache *zipf_cache, RandomState *random_state, - int64 n, double s) -{ - ZipfCell *cell = zipfFindOrCreateCacheCell(zipf_cache, n, s); - double uniform = pg_erand48(random_state->xseed); - double uz = uniform * cell->harmonicn; - - if (uz < 1.0) - return 1; - if (uz < 1.0 + cell->beta) - return 2; - return 1 + (int64) (cell->n * pow(cell->eta * uniform - cell->eta + 1.0, cell->alpha)); -} - /* random number generator: zipfian distribution from min to max inclusive */ static int64 -getZipfianRand(ZipfCache *zipf_cache, RandomState *random_state, int64 min, - int64 max, double s) +getZipfianRand(RandomState *random_state, int64 min, int64 max, double s) { int64 n = max - min + 1; /* abort if parameter is invalid */ - Assert(s > 0.0 && s != 1.0 && s <= MAX_ZIPFIAN_PARAM); + Assert(MIN_ZIPFIAN_PARAM <= s && s <= MAX_ZIPFIAN_PARAM); - return min - 1 + ((s > 1) - ? computeIterativeZipfian(random_state, n, s) - : computeHarmonicZipfian(zipf_cache, random_state, n, s)); + return min - 1 + computeIterativeZipfian(random_state, n, s); } /* @@ -2426,17 +2311,17 @@ evalStandardFunc(TState *thread, CState *st, } else if (func == PGBENCH_RANDOM_ZIPFIAN) { - if (param <= 0.0 || param == 1.0 || param > MAX_ZIPFIAN_PARAM) + if (param < MIN_ZIPFIAN_PARAM || param > MAX_ZIPFIAN_PARAM) { fprintf(stderr, - "zipfian parameter must be in range (0, 1) U (1, %d]" - " (got %f)\n", MAX_ZIPFIAN_PARAM, param); + "zipfian parameter must be in range [%.3f, %.0f]" + " (not %f)\n", + MIN_ZIPFIAN_PARAM, MAX_ZIPFIAN_PARAM, param); return false; } + setIntValue(retval, - getZipfianRand(&thread->zipf_cache, - &st->cs_func_rs, - imin, imax, param)); + getZipfianRand(&st->cs_func_rs, imin, imax, param)); } else /* exponential */ { @@ -2444,7 +2329,7 @@ evalStandardFunc(TState *thread, CState *st, { fprintf(stderr, "exponential parameter must be greater than zero" - " (got %f)\n", param); + " (not %f)\n", param); return false; } @@ -4996,8 +4881,6 @@ printResults(TState *threads, StatsData *total, instr_time total_time, tps_include, tps_exclude; int64 ntx = total->cnt - total->skipped; - int i, - totalCacheOverflows = 0; time_include = INSTR_TIME_GET_DOUBLE(total_time); @@ -5025,15 +4908,6 @@ printResults(TState *threads, StatsData *total, instr_time total_time, printf("number of transactions actually processed: " INT64_FORMAT "\n", ntx); } - /* Report zipfian cache overflow */ - for (i = 0; i < nthreads; i++) - { - totalCacheOverflows += threads[i].zipf_cache.overflowCount; - } - if (totalCacheOverflows > 0) - { - printf("zipfian cache array overflowed %d time(s)\n", totalCacheOverflows); - } /* Remaining stats are nonsensical if we failed to execute any xacts */ if (total->cnt <= 0) @@ -5916,9 +5790,6 @@ main(int argc, char **argv) initRandomState(&thread->ts_sample_rs); thread->logfile = NULL; /* filled in later */ thread->latency_late = 0; - thread->zipf_cache.nb_cells = 0; - thread->zipf_cache.current = 0; - thread->zipf_cache.overflowCount = 0; initStats(&thread->stats, 0); nclients_dealt += thread->nstate; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index ad3a7a79f87..62906d5e55d 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -666,13 +666,13 @@ SELECT LEAST(}.join(', ', (':i') x 256).q{)} [ 'set zipfian param to 1', 2, - [qr{zipfian parameter must be in range \(0, 1\) U \(1, \d+\]}], + [qr{zipfian parameter must be in range \[1\.001, 1000\]}], q{\set i random_zipfian(0, 10, 1)} ], [ 'set zipfian param too large', 2, - [qr{zipfian parameter must be in range \(0, 1\) U \(1, \d+\]}], + [qr{zipfian parameter must be in range \[1\.001, 1000\]}], q{\set i random_zipfian(0, 10, 1000000)} ], [ @@ -802,33 +802,6 @@ for my $e (@errors) { $n => $script }); } -# zipfian cache array overflow -pgbench( - '-t 1', 0, - [ qr{processed: 1/1}, qr{zipfian cache array overflowed 1 time\(s\)} ], - [qr{^}], - 'pgbench zipfian array overflow on random_zipfian', - { - '001_pgbench_random_zipfian' => q{ -\set i random_zipfian(1, 100, 0.5) -\set i random_zipfian(2, 100, 0.5) -\set i random_zipfian(3, 100, 0.5) -\set i random_zipfian(4, 100, 0.5) -\set i random_zipfian(5, 100, 0.5) -\set i random_zipfian(6, 100, 0.5) -\set i random_zipfian(7, 100, 0.5) -\set i random_zipfian(8, 100, 0.5) -\set i random_zipfian(9, 100, 0.5) -\set i random_zipfian(10, 100, 0.5) -\set i random_zipfian(11, 100, 0.5) -\set i random_zipfian(12, 100, 0.5) -\set i random_zipfian(13, 100, 0.5) -\set i random_zipfian(14, 100, 0.5) -\set i random_zipfian(15, 100, 0.5) -\set i random_zipfian(16, 100, 0.5) -} - }); - # throttling pgbench( '-t 100 -S --rate=100000 --latency-limit=1000000 -c 2 -n -r',