In psql \copy from, send data to server in larger chunks.

Previously, we would send each line as a separate CopyData message. That's pretty wasteful if the table is narrow, as each CopyData message has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of input data into each CopyData message. The server also sends each line as a separate CopyData message in COPY TO STDOUT, and that's similarly wasteful. But that's documented in the FE/BE protocol description, so changing that would be a wire protocol break. Reviewed-by: Aleksander Alekseev Discussion: https://www.postgresql.org/message-id/40b2cec0-d0fb-3191-2ae1-9a3fe16a7e48%40iki.fi
2025-08-05 07:41:25 +03:00 · 2021-07-14 13:08:28 +03:00
parent b4deefc39b
commit eec57115e4
1 changed files with 57 additions and 44 deletions
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -581,13 +581,21 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
 	else
 	{
 		bool		copydone = false;
 		int			buflen;
 		bool		at_line_begin = true;
 		/*
 		 * In text mode, we have to read the input one line at a time, so that
 		 * we can stop reading at the EOF marker (\.).  We mustn't read beyond
 		 * the EOF marker, because if the data was inlined in a SQL script, we
 		 * would eat up the commands after the EOF marker.
 		 */
 		buflen = 0;
 		while (!copydone)
-		{						/* for each input line ... */
+		{
-			bool		firstload;
+			char	   *fgresult;
 			bool		linedone;
-			if (showprompt)
+			if (at_line_begin && showprompt)
 			{
 				const char *prompt = get_prompt(PROMPT_COPY, NULL);
@@ -595,56 +603,37 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
 				fflush(stdout);
 			}
 			firstload = true;
 			linedone = false;
 			while (!linedone)
 			{					/* for each bufferload in line ... */
 				int			linelen;
 				char	   *fgresult;
 			/* enable longjmp while waiting for input */
 			sigint_interrupt_enabled = true;
-				fgresult = fgets(buf, sizeof(buf), copystream);
+			fgresult = fgets(&buf[buflen], COPYBUFSIZ - buflen, copystream);
 			sigint_interrupt_enabled = false;
 			if (!fgresult)
 				{
 				copydone = true;
-					break;
+			else
-				}
+			{
 				int			linelen;
-				linelen = strlen(buf);
+				linelen = strlen(fgresult);
 				buflen += linelen;
 				/* current line is done? */
-				if (linelen > 0 && buf[linelen - 1] == '\n')
+				if (buf[buflen - 1] == '\n')
-					linedone = true;
+				{
 					/* check for EOF marker, but not on a partial line */
-				if (firstload)
+					if (at_line_begin)
 					{
 						/*
 						 * This code erroneously assumes '\.' on a line alone
 						 * inside a quoted CSV string terminates the \copy.
 						 * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org
 						 */
-					if (strcmp(buf, "\\.\n") == 0 ||
+						if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) ||
-						strcmp(buf, "\\.\r\n") == 0)
+							(linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0))
 						{
 							copydone = true;
 						break;
 					}
 					firstload = false;
 				}
 				if (PQputCopyData(conn, buf, linelen) <= 0)
 				{
 					OK = false;
 					copydone = true;
 					break;
 						}
 					}
@@ -653,6 +642,30 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
 						pset.lineno++;
 						pset.stmt_lineno++;
 					}
 					at_line_begin = true;
 				}
 				else
 					at_line_begin = false;
 			}
 			/*
 			 * If the buffer is full, or we've reached the EOF, flush it.
 			 *
 			 * Make sure there's always space for four more bytes in the
 			 * buffer, plus a NUL terminator.  That way, an EOF marker is
 			 * never split across two fgets() calls, which simplies the logic.
 			 */
 			if (buflen >= COPYBUFSIZ - 5 || (copydone && buflen > 0))
 			{
 				if (PQputCopyData(conn, buf, buflen) <= 0)
 				{
 					OK = false;
 					copydone = true;
 					break;
 				}
 				buflen = 0;
 			}
 		}
 	}