Refactor CopyReadAttributes{CSV,Text}() to use a callback in COPY FROM

CopyReadAttributes{CSV,Text}() are used to parse lines for text and CSV format. This reduces the number of "if" branches that need to be checked when parsing fields in CSV and text mode when dealing with a COPY FROM, something that can become more noticeable with more attributes and more lines to process. Extracted from a larger patch by the same author. Author: Sutou Kouhei Discussion: https://postgr.es/m/20231204.153548.2126325458835528809.kou@clear-code.com
2025-10-27 00:12:01 +03:00 · 2024-02-05 09:46:02 +09:00
parent 774bcffe4a
commit 95fb5b4902
3 changed files with 30 additions and 16 deletions
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1776,6 +1776,11 @@ BeginCopyFrom(ParseState *pstate,

 		cstate->max_fields = attr_count;
 		cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *));
+
+		if (cstate->opts.csv_mode)
+			cstate->copy_read_attributes = CopyReadAttributesCSV;
+		else
+			cstate->copy_read_attributes = CopyReadAttributesText;
 	}

 	MemoryContextSwitchTo(oldcontext);
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -25,10 +25,10 @@
 *    is copied into 'line_buf', with quotes and escape characters still
 *    intact.
 *
- * 4. CopyReadAttributesText/CSV() function takes the input line from
- *    'line_buf', and splits it into fields, unescaping the data as required.
- *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
- *    pointers to each field.
+ * 4. CopyReadAttributesText/CSV() function (via copy_read_attribute) takes
+ *    the input line from 'line_buf', and splits it into fields, unescaping
+ *    the data as required.  The fields are stored in 'attribute_buf', and
+ *    'raw_fields' array holds pointers to each field.
 *
 * If encoding conversion is not required, a shortcut is taken in step 2 to
 * avoid copying the data unnecessarily.  The 'input_buf' pointer is set to
@@ -152,8 +152,6 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 /* non-export function prototypes */
 static bool CopyReadLine(CopyFromState cstate);
 static bool CopyReadLineText(CopyFromState cstate);
-static int	CopyReadAttributesText(CopyFromState cstate);
-static int	CopyReadAttributesCSV(CopyFromState cstate);
 static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
 									 Oid typioparam, int32 typmod,
 									 bool *isnull);
@@ -775,10 +773,7 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
 		{
 			int			fldnum;

-			if (cstate->opts.csv_mode)
-				fldct = CopyReadAttributesCSV(cstate);
-			else
-				fldct = CopyReadAttributesText(cstate);
+			fldct = cstate->copy_read_attributes(cstate);

 			if (fldct != list_length(cstate->attnumlist))
 				ereport(ERROR,
@@ -830,10 +825,7 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
 		return false;

 	/* Parse the line into de-escaped field values */
-	if (cstate->opts.csv_mode)
-		fldct = CopyReadAttributesCSV(cstate);
-	else
-		fldct = CopyReadAttributesText(cstate);
+	fldct = cstate->copy_read_attributes(cstate);

 	*fields = cstate->raw_fields;
 	*nfields = fldct;
@@ -1502,7 +1494,7 @@ GetDecimalFromHex(char hex)
 *
 * The return value is the number of fields actually read.
 */
-static int
+int
 CopyReadAttributesText(CopyFromState cstate)
 {
 	char		delimc = cstate->opts.delim[0];
@@ -1756,7 +1748,7 @@ CopyReadAttributesText(CopyFromState cstate)
 * CopyReadAttributesText, except we parse the fields according to
 * "standard" (i.e. common) CSV usage.
 */
-static int
+int
 CopyReadAttributesCSV(CopyFromState cstate)
 {
 	char		delimc = cstate->opts.delim[0];