mirror of
https://github.com/postgres/postgres.git
synced 2025-10-27 00:12:01 +03:00
Get rid of O(N^2) script-parsing overhead in pgbench.
pgbench wants to record the starting line number of each command in its scripts. It was computing that by scanning from the script start and counting newlines, so that O(N^2) work had to be done for an N-command script. In a script with 50K lines, this adds up to about 10 seconds on my machine. To add insult to injury, the results were subtly wrong, because expr_scanner_offset() scanned to find the NUL that flex inserts at the end of the current token --- and before the first yylex call, no such NUL has been inserted. So we ended by computing the script's last line number not its first one. This was visible only in case of \gset at the start of a script, which perhaps accounts for the lack of complaints. To fix, steal an idea from plpgsql and track the current lexer ending position and line count as we advance through the script. (It's a bit simpler than plpgsql since we can't need to back up.) Also adjust a couple of other places that were invoking scans from script start when they didn't really need to. I made a new psqlscan function psql_scan_get_location() that replaces both expr_scanner_offset() and expr_scanner_get_lineno(), since in practice expr_scanner_get_lineno() was only being invoked to find the line number of the current lexer end position. Reported-by: Daniel Vérité <daniel@manitou-mail.org> Author: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/84a8a89e-adb8-47a9-9d34-c13f7150ee45@manitou-mail.org
This commit is contained in:
@@ -5690,8 +5690,8 @@ process_backslash_command(PsqlScanState sstate, const char *source)
|
||||
initPQExpBuffer(&word_buf);
|
||||
|
||||
/* Remember location of the backslash */
|
||||
start_offset = expr_scanner_offset(sstate) - 1;
|
||||
lineno = expr_scanner_get_lineno(sstate, start_offset);
|
||||
psql_scan_get_location(sstate, &lineno, &start_offset);
|
||||
start_offset--;
|
||||
|
||||
/* Collect first word of command */
|
||||
if (!expr_lex_one_word(sstate, &word_buf, &word_offset))
|
||||
@@ -5747,7 +5747,6 @@ process_backslash_command(PsqlScanState sstate, const char *source)
|
||||
my_command->first_line =
|
||||
expr_scanner_get_substring(sstate,
|
||||
start_offset,
|
||||
expr_scanner_offset(sstate),
|
||||
true);
|
||||
|
||||
expr_scanner_finish(yyscanner);
|
||||
@@ -5777,7 +5776,6 @@ process_backslash_command(PsqlScanState sstate, const char *source)
|
||||
my_command->first_line =
|
||||
expr_scanner_get_substring(sstate,
|
||||
start_offset,
|
||||
expr_scanner_offset(sstate),
|
||||
true);
|
||||
|
||||
if (my_command->meta == META_SLEEP)
|
||||
@@ -5952,8 +5950,6 @@ ParseScript(const char *script, const char *desc, int weight)
|
||||
PQExpBufferData line_buf;
|
||||
int alloc_num;
|
||||
int index;
|
||||
int lineno;
|
||||
int start_offset;
|
||||
|
||||
#define COMMANDS_ALLOC_NUM 128
|
||||
alloc_num = COMMANDS_ALLOC_NUM;
|
||||
@@ -5977,7 +5973,6 @@ ParseScript(const char *script, const char *desc, int weight)
|
||||
* stdstrings should be true, which is a bit riskier.
|
||||
*/
|
||||
psql_scan_setup(sstate, script, strlen(script), 0, true);
|
||||
start_offset = expr_scanner_offset(sstate) - 1;
|
||||
|
||||
initPQExpBuffer(&line_buf);
|
||||
|
||||
@@ -5985,12 +5980,15 @@ ParseScript(const char *script, const char *desc, int weight)
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int lineno;
|
||||
int start_offset;
|
||||
PsqlScanResult sr;
|
||||
promptStatus_t prompt;
|
||||
Command *command = NULL;
|
||||
|
||||
resetPQExpBuffer(&line_buf);
|
||||
lineno = expr_scanner_get_lineno(sstate, start_offset);
|
||||
|
||||
psql_scan_get_location(sstate, &lineno, &start_offset);
|
||||
|
||||
sr = psql_scan(sstate, &line_buf, &prompt);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user