In psql \copy from, send data to server in larger chunks.

Previously, we would send each line as a separate CopyData message. That's pretty wasteful if the table is narrow, as each CopyData message has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of input data into each CopyData message. The server also sends each line as a separate CopyData message in COPY TO STDOUT, and that's similarly wasteful. But that's documented in the FE/BE protocol description, so changing that would be a wire protocol break. Reviewed-by: Aleksander Alekseev Discussion: https://www.postgresql.org/message-id/40b2cec0-d0fb-3191-2ae1-9a3fe16a7e48%40iki.fi
2025-07-30 11:03:19 +03:00 · 2021-07-14 13:08:28 +03:00
parent b4deefc39b
commit eec57115e4
1 changed files with 57 additions and 44 deletions
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@ -581,13 +581,21 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
 	else
 	{
 		bool		copydone = false;
+		int			buflen;
+		bool		at_line_begin = true;

+		/*
+		 * In text mode, we have to read the input one line at a time, so that
+		 * we can stop reading at the EOF marker (\.).  We mustn't read beyond
+		 * the EOF marker, because if the data was inlined in a SQL script, we
+		 * would eat up the commands after the EOF marker.
+		 */
+		buflen = 0;
 		while (!copydone)
-		{						/* for each input line ... */
-			bool		firstload;
-			bool		linedone;
+		{
+			char	   *fgresult;

-			if (showprompt)
+			if (at_line_begin && showprompt)
 			{
 				const char *prompt = get_prompt(PROMPT_COPY, NULL);

@ -595,63 +603,68 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
 				fflush(stdout);
 			}

-			firstload = true;
-			linedone = false;
+			/* enable longjmp while waiting for input */
+			sigint_interrupt_enabled = true;

-			while (!linedone)
-			{					/* for each bufferload in line ... */
+			fgresult = fgets(&buf[buflen], COPYBUFSIZ - buflen, copystream);
+
+			sigint_interrupt_enabled = false;
+
+			if (!fgresult)
+				copydone = true;
+			else
+			{
 				int			linelen;
-				char	   *fgresult;

-				/* enable longjmp while waiting for input */
-				sigint_interrupt_enabled = true;
-
-				fgresult = fgets(buf, sizeof(buf), copystream);
-
-				sigint_interrupt_enabled = false;
-
-				if (!fgresult)
-				{
-					copydone = true;
-					break;
-				}
-
-				linelen = strlen(buf);
+				linelen = strlen(fgresult);
+				buflen += linelen;

 				/* current line is done? */
-				if (linelen > 0 && buf[linelen - 1] == '\n')
-					linedone = true;
-
-				/* check for EOF marker, but not on a partial line */
-				if (firstload)
+				if (buf[buflen - 1] == '\n')
 				{
-					/*
-					 * This code erroneously assumes '\.' on a line alone
-					 * inside a quoted CSV string terminates the \copy.
-					 * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org
-					 */
-					if (strcmp(buf, "\\.\n") == 0 ||
-						strcmp(buf, "\\.\r\n") == 0)
+					/* check for EOF marker, but not on a partial line */
+					if (at_line_begin)
 					{
-						copydone = true;
-						break;
+						/*
+						 * This code erroneously assumes '\.' on a line alone
+						 * inside a quoted CSV string terminates the \copy.
+						 * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org
+						 */
+						if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) ||
+							(linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0))
+						{
+							copydone = true;
+						}
 					}

-					firstload = false;
+					if (copystream == pset.cur_cmd_source)
+					{
+						pset.lineno++;
+						pset.stmt_lineno++;
+					}
+					at_line_begin = true;
 				}
+				else
+					at_line_begin = false;
+			}

-				if (PQputCopyData(conn, buf, linelen) <= 0)
+			/*
+			 * If the buffer is full, or we've reached the EOF, flush it.
+			 *
+			 * Make sure there's always space for four more bytes in the
+			 * buffer, plus a NUL terminator.  That way, an EOF marker is
+			 * never split across two fgets() calls, which simplies the logic.
+			 */
+			if (buflen >= COPYBUFSIZ - 5 || (copydone && buflen > 0))
+			{
+				if (PQputCopyData(conn, buf, buflen) <= 0)
 				{
 					OK = false;
 					copydone = true;
 					break;
 				}
-			}

-			if (copystream == pset.cur_cmd_source)
-			{
-				pset.lineno++;
-				pset.stmt_lineno++;
+				buflen = 0;
 			}
 		}
 	}