diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3cbac..e3a0abb4c70 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -306,6 +306,31 @@ pgbench options d
+
+
+
+
+ Create a partitioned pgbench_accounts table with
+ NUM partitions of nearly equal size for
+ the scaled number of accounts.
+ Default is 0, meaning no partitioning.
+
+
+
+
+
+
+
+
+ Create a partitioned pgbench_accounts table with
+ NAME method.
+ Expected values are range or hash.
+ This option requires that is set to non-zero.
+ If unspecified, default is range.
+
+
+
+
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652bfbf6..e72ad0036e1 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,23 @@ int64 latency_limit = 0;
char *tablespace = NULL;
char *index_tablespace = NULL;
+/*
+ * Number of "pgbench_accounts" partitions. 0 is the default and means no
+ * partitioning.
+ */
+static int partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+ PART_NONE, /* no partitioning */
+ PART_RANGE, /* range partitioning */
+ PART_HASH /* hash partitioning */
+} partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
/* random seed used to initialize base_random_sequence */
int64 random_seed = -1;
@@ -582,6 +599,7 @@ static void doLog(TState *thread, CState *st,
StatsData *agg, bool skipped, double latency, double lag);
static void processXactStats(TState *thread, CState *st, instr_time *now,
bool skipped, StatsData *agg);
+static void append_fillfactor(char *opts, int len);
static void addScript(ParsedScript script);
static void *threadRun(void *arg);
static void finishCon(CState *st);
@@ -617,6 +635,9 @@ usage(void)
" --foreign-keys create foreign key constraints between tables\n"
" --index-tablespace=TABLESPACE\n"
" create indexes in the specified tablespace\n"
+ " --partitions=NUM partition pgbench_accounts in NUM parts (default: 0)\n"
+ " --partition-method=(range|hash)\n"
+ " partition pgbench_accounts with this method (default: range)\n"
" --tablespace=TABLESPACE create tables in the specified tablespace\n"
" --unlogged-tables create tables as unlogged tables\n"
"\nOptions to select what to run:\n"
@@ -3601,6 +3622,77 @@ initDropTables(PGconn *con)
"pgbench_tellers");
}
+/*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so we choose to partition it.
+ */
+static void
+createPartitions(PGconn *con)
+{
+ char ff[64];
+
+ ff[0] = '\0';
+
+ /*
+ * Per ddlinfo in initCreateTables, fillfactor is needed on table
+ * pgbench_accounts.
+ */
+ append_fillfactor(ff, sizeof(ff));
+
+ /* we must have to create some partitions */
+ Assert(partitions > 0);
+
+ fprintf(stderr, "creating %d partitions...\n", partitions);
+
+ for (int p = 1; p <= partitions; p++)
+ {
+ char query[256];
+
+ if (partition_method == PART_RANGE)
+ {
+ int64 part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+ char minvalue[32],
+ maxvalue[32];
+
+ /*
+ * For RANGE, we use open-ended partitions at the beginning and
+ * end to allow any valid value for the primary key. Although the
+ * actual minimum and maximum values can be derived from the
+ * scale, it is more generic and the performance is better.
+ */
+ if (p == 1)
+ sprintf(minvalue, "minvalue");
+ else
+ sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+ if (p < partitions)
+ sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+ else
+ sprintf(maxvalue, "maxvalue");
+
+ snprintf(query, sizeof(query),
+ "create%s table pgbench_accounts_%d\n"
+ " partition of pgbench_accounts\n"
+ " for values from (%s) to (%s)%s\n",
+ unlogged_tables ? " unlogged" : "", p,
+ minvalue, maxvalue, ff);
+ }
+ else if (partition_method == PART_HASH)
+ snprintf(query, sizeof(query),
+ "create%s table pgbench_accounts_%d\n"
+ " partition of pgbench_accounts\n"
+ " for values with (modulus %d, remainder %d)%s\n",
+ unlogged_tables ? " unlogged" : "", p,
+ partitions, p - 1, ff);
+ else /* cannot get there */
+ Assert(0);
+
+ executeStatement(con, query);
+ }
+}
+
/*
* Create pgbench's standard tables
*/
@@ -3664,9 +3756,15 @@ initCreateTables(PGconn *con)
/* Construct new create table statement. */
opts[0] = '\0';
- if (ddl->declare_fillfactor)
+
+ /* Partition pgbench_accounts table */
+ if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
- " with (fillfactor=%d)", fillfactor);
+ " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+ else if (ddl->declare_fillfactor)
+ /* fillfactor is only expected on actual tables */
+ append_fillfactor(opts, sizeof(opts));
+
if (tablespace != NULL)
{
char *escape_tablespace;
@@ -3686,6 +3784,21 @@ initCreateTables(PGconn *con)
executeStatement(con, buffer);
}
+
+ if (partition_method != PART_NONE)
+ createPartitions(con);
+}
+
+/*
+ * add fillfactor percent option.
+ *
+ * XXX - As default is 100, it could be removed in this case.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+ snprintf(opts + strlen(opts), len - strlen(opts),
+ " with (fillfactor=%d)", fillfactor);
}
/*
@@ -4010,6 +4123,121 @@ runInitSteps(const char *initialize_steps)
termPQExpBuffer(&stats);
}
+/*
+ * Extract pgbench table informations into global variables scale,
+ * partition_method and partitions.
+ */
+static void
+GetTableInfo(PGconn *con, bool scale_given)
+{
+ PGresult *res;
+
+ /*
+ * get the scaling factor that should be same as count(*) from
+ * pgbench_branches if this is not a custom query
+ */
+ res = PQexec(con, "select count(*) from pgbench_branches");
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ char *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
+
+ fprintf(stderr, "%s", PQerrorMessage(con));
+ if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
+ {
+ fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+ }
+
+ exit(1);
+ }
+ scale = atoi(PQgetvalue(res, 0, 0));
+ if (scale < 0)
+ {
+ fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
+ PQgetvalue(res, 0, 0));
+ exit(1);
+ }
+ PQclear(res);
+
+ /* warn if we override user-given -s switch */
+ if (scale_given)
+ fprintf(stderr,
+ "scale option ignored, using count from pgbench_branches table (%d)\n",
+ scale);
+
+ /*
+ * Get the partition information for the first "pgbench_accounts" table
+ * found in search_path.
+ *
+ * The result is empty if no "pgbench_accounts" is found.
+ *
+ * Otherwise, it always returns one row even if the table is not
+ * partitioned (in which case the partition strategy is NULL).
+ *
+ * The number of partitions can be 0 even for partitioned tables, if no
+ * partition is attached.
+ *
+ * We assume no partitioning on any failure, so as to avoid failing on an
+ * old version without "pg_partitioned_table".
+ */
+ res = PQexec(con,
+ "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+ "from pg_catalog.pg_class as c "
+ "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+ "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+ "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+ "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+ "where c.relname = 'pgbench_accounts' and o.n is not null "
+ "group by 1, 2 "
+ "order by 1 asc "
+ "limit 1");
+
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ /* probably an older version, coldly assume no partitioning */
+ partition_method = PART_NONE;
+ partitions = 0;
+ }
+ else if (PQntuples(res) == 0)
+ {
+ /*
+ * This case is unlikely as pgbench already found "pgbench_branches"
+ * above to compute the scale.
+ */
+ fprintf(stderr,
+ "no pgbench_accounts table found in search_path\n"
+ "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\".\n", PQdb(con));
+ exit(1);
+ }
+ else /* PQntupes(res) == 1 */
+ {
+ /* normal case, extract partition information */
+ if (PQgetisnull(res, 0, 1))
+ partition_method = PART_NONE;
+ else
+ {
+ char *ps = PQgetvalue(res, 0, 1);
+
+ /* column must be there */
+ Assert(ps != NULL);
+
+ if (strcmp(ps, "r") == 0)
+ partition_method = PART_RANGE;
+ else if (strcmp(ps, "h") == 0)
+ partition_method = PART_HASH;
+ else
+ {
+ /* possibly a newer version with new partition method */
+ fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+ exit(1);
+ }
+ }
+
+ partitions = atoi(PQgetvalue(res, 0, 2));
+ }
+
+ PQclear(res);
+}
+
/*
* Replace :param with $n throughout the command's SQL text, which
* is a modifiable string in cmd->lines.
@@ -4919,6 +5147,10 @@ printResults(StatsData *total, instr_time total_time,
printf("transaction type: %s\n",
num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
printf("scaling factor: %d\n", scale);
+ /* only print partitioning information if some partitioning was detected */
+ if (partition_method != PART_NONE)
+ printf("partition method: %s\npartitions: %d\n",
+ PARTITION_METHOD[partition_method], partitions);
printf("query mode: %s\n", QUERYMODE[querymode]);
printf("number of clients: %d\n", nclients);
printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5358,8 @@ main(int argc, char **argv)
{"foreign-keys", no_argument, NULL, 8},
{"random-seed", required_argument, NULL, 9},
{"show-script", required_argument, NULL, 10},
+ {"partitions", required_argument, NULL, 11},
+ {"partition-method", required_argument, NULL, 12},
{NULL, 0, NULL, 0}
};
@@ -5160,7 +5394,6 @@ main(int argc, char **argv)
#endif
PGconn *con;
- PGresult *res;
char *env;
int exit_code = 0;
@@ -5486,6 +5719,29 @@ main(int argc, char **argv)
exit(0);
}
break;
+ case 11: /* partitions */
+ initialization_option_set = true;
+ partitions = atoi(optarg);
+ if (partitions < 0)
+ {
+ fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+ optarg);
+ exit(1);
+ }
+ break;
+ case 12: /* partition-method */
+ initialization_option_set = true;
+ if (pg_strcasecmp(optarg, "range") == 0)
+ partition_method = PART_RANGE;
+ else if (pg_strcasecmp(optarg, "hash") == 0)
+ partition_method = PART_HASH;
+ else
+ {
+ fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+ " got: \"%s\"\n", optarg);
+ exit(1);
+ }
+ break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
@@ -5567,6 +5823,16 @@ main(int argc, char **argv)
exit(1);
}
+ if (partitions == 0 && partition_method != PART_NONE)
+ {
+ fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+ exit(1);
+ }
+
+ /* set default method */
+ if (partitions > 0 && partition_method == PART_NONE)
+ partition_method = PART_RANGE;
+
if (initialize_steps == NULL)
initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
@@ -5724,39 +5990,7 @@ main(int argc, char **argv)
}
if (internal_script_used)
- {
- /*
- * get the scaling factor that should be same as count(*) from
- * pgbench_branches if this is not a custom query
- */
- res = PQexec(con, "select count(*) from pgbench_branches");
- if (PQresultStatus(res) != PGRES_TUPLES_OK)
- {
- char *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
-
- fprintf(stderr, "%s", PQerrorMessage(con));
- if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
- {
- fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
- }
-
- exit(1);
- }
- scale = atoi(PQgetvalue(res, 0, 0));
- if (scale < 0)
- {
- fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
- PQgetvalue(res, 0, 0));
- exit(1);
- }
- PQclear(res);
-
- /* warn if we override user-given -s switch */
- if (scale_given)
- fprintf(stderr,
- "scale option ignored, using count from pgbench_branches table (%d)\n",
- scale);
- }
+ GetTableInfo(con, scale_given);
/*
* :scale variables normally get -s or database scale, but don't override
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4f..c441626d7cd 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,19 @@ sub pgbench
return;
}
+# tablespace for testing, because partitioned tables cannot use pg_default
+# explicitly and we want to test that table creation with tablespace works
+# for partitioned tables.
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+# this takes care of WIN-specific path issues
+my $ets = TestLib::perl2host($ts);
+
+# the next commands will issue a syntax error if the path contains a "'"
+$node->safe_psql('postgres',
+ "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
# Test concurrent OID generation via pg_enum_oid_index. This indirectly
# exercises LWLock and spinlock concurrency.
my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +113,13 @@ pgbench(
# Again, with all possible options
pgbench(
- '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+ '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
0,
[qr{^$}i],
[
qr{dropping old tables},
qr{creating tables},
+ qr{creating 2 partitions},
qr{vacuuming},
qr{creating primary keys},
qr{creating foreign keys},
@@ -116,12 +130,13 @@ pgbench(
# Test interaction of --init-steps with legacy step-selection options
pgbench(
- '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+ '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
0,
[qr{^$}],
[
qr{dropping old tables},
qr{creating tables},
+ qr{creating 3 partitions},
qr{creating primary keys},
qr{.* of .* tuples \(.*\) done},
qr{creating foreign keys},
@@ -910,5 +925,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
qr{^\d \d{1,2} \d+ \d \d+ \d+$});
# done
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
$node->stop;
done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa18418b4..1e9542af3f2 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
qr{error while setting random seed from --random-seed option}
]
],
+ [ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+ [ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+ [
+ 'partition method without partitioning',
+ '-i --partition-method=hash',
+ [ qr{partition-method requires greater than zero --partitions} ]
+ ],
# logging sub-options
[