1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-05 07:41:25 +03:00

In psql \copy from, send data to server in larger chunks.

Previously, we would send each line as a separate CopyData message.
That's pretty wasteful if the table is narrow, as each CopyData message
has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of
input data into each CopyData message.

The server also sends each line as a separate CopyData message in COPY TO
STDOUT, and that's similarly wasteful. But that's documented in the FE/BE
protocol description, so changing that would be a wire protocol break.

Reviewed-by: Aleksander Alekseev
Discussion: https://www.postgresql.org/message-id/40b2cec0-d0fb-3191-2ae1-9a3fe16a7e48%40iki.fi
This commit is contained in:
Heikki Linnakangas
2021-07-14 13:08:28 +03:00
parent b4deefc39b
commit eec57115e4

View File

@@ -581,13 +581,21 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
else else
{ {
bool copydone = false; bool copydone = false;
int buflen;
bool at_line_begin = true;
/*
* In text mode, we have to read the input one line at a time, so that
* we can stop reading at the EOF marker (\.). We mustn't read beyond
* the EOF marker, because if the data was inlined in a SQL script, we
* would eat up the commands after the EOF marker.
*/
buflen = 0;
while (!copydone) while (!copydone)
{ /* for each input line ... */ {
bool firstload; char *fgresult;
bool linedone;
if (showprompt) if (at_line_begin && showprompt)
{ {
const char *prompt = get_prompt(PROMPT_COPY, NULL); const char *prompt = get_prompt(PROMPT_COPY, NULL);
@@ -595,56 +603,37 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
fflush(stdout); fflush(stdout);
} }
firstload = true;
linedone = false;
while (!linedone)
{ /* for each bufferload in line ... */
int linelen;
char *fgresult;
/* enable longjmp while waiting for input */ /* enable longjmp while waiting for input */
sigint_interrupt_enabled = true; sigint_interrupt_enabled = true;
fgresult = fgets(buf, sizeof(buf), copystream); fgresult = fgets(&buf[buflen], COPYBUFSIZ - buflen, copystream);
sigint_interrupt_enabled = false; sigint_interrupt_enabled = false;
if (!fgresult) if (!fgresult)
{
copydone = true; copydone = true;
break; else
} {
int linelen;
linelen = strlen(buf); linelen = strlen(fgresult);
buflen += linelen;
/* current line is done? */ /* current line is done? */
if (linelen > 0 && buf[linelen - 1] == '\n') if (buf[buflen - 1] == '\n')
linedone = true; {
/* check for EOF marker, but not on a partial line */ /* check for EOF marker, but not on a partial line */
if (firstload) if (at_line_begin)
{ {
/* /*
* This code erroneously assumes '\.' on a line alone * This code erroneously assumes '\.' on a line alone
* inside a quoted CSV string terminates the \copy. * inside a quoted CSV string terminates the \copy.
* https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org
*/ */
if (strcmp(buf, "\\.\n") == 0 || if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) ||
strcmp(buf, "\\.\r\n") == 0) (linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0))
{ {
copydone = true; copydone = true;
break;
}
firstload = false;
}
if (PQputCopyData(conn, buf, linelen) <= 0)
{
OK = false;
copydone = true;
break;
} }
} }
@@ -653,6 +642,30 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
pset.lineno++; pset.lineno++;
pset.stmt_lineno++; pset.stmt_lineno++;
} }
at_line_begin = true;
}
else
at_line_begin = false;
}
/*
* If the buffer is full, or we've reached the EOF, flush it.
*
* Make sure there's always space for four more bytes in the
* buffer, plus a NUL terminator. That way, an EOF marker is
* never split across two fgets() calls, which simplies the logic.
*/
if (buflen >= COPYBUFSIZ - 5 || (copydone && buflen > 0))
{
if (PQputCopyData(conn, buf, buflen) <= 0)
{
OK = false;
copydone = true;
break;
}
buflen = 0;
}
} }
} }