From 26cb14aea12a0f0c2f9a49de3865721936b711a7 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 22 Jan 2026 10:14:12 +0900 Subject: [PATCH] file_fdw: Support multi-line HEADER option. Commit bc2f348 introduced multi-line HEADER support for COPY. This commit extends this capability to file_fdw, allowing multiple header lines to be skipped. Because CREATE/ALTER FOREIGN TABLE requires option values to be single-quoted, this commit also updates defGetCopyHeaderOption() to accept integer values specified as strings for HEADER option. Author: Shinya Kato Reviewed-by: Fujii Masao Reviewed-by: songjinzhou Reviewed-by: Japin Li Reviewed-by: Chao Li Discussion: https://postgr.es/m/CAOzEurT+iwC47VHPMS+uJ4WSzvOLPsZ2F2_wopm8M7O+CZa3Xw@mail.gmail.com --- contrib/file_fdw/data/multiline_header.csv | 4 ++ contrib/file_fdw/expected/file_fdw.out | 29 +++++++++- contrib/file_fdw/sql/file_fdw.sql | 13 +++++ doc/src/sgml/file-fdw.sgml | 4 +- src/backend/commands/copy.c | 62 +++++++++++++--------- src/test/regress/expected/copy.out | 18 +++++++ src/test/regress/expected/copy2.out | 6 +++ src/test/regress/sql/copy.sql | 15 ++++++ src/test/regress/sql/copy2.sql | 3 ++ 9 files changed, 125 insertions(+), 29 deletions(-) create mode 100644 contrib/file_fdw/data/multiline_header.csv diff --git a/contrib/file_fdw/data/multiline_header.csv b/contrib/file_fdw/data/multiline_header.csv new file mode 100644 index 00000000000..0d5e482a1e4 --- /dev/null +++ b/contrib/file_fdw/data/multiline_header.csv @@ -0,0 +1,4 @@ +first header line +second header line +1,alpha +2,beta diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out index 5121e27dce5..fde7e12a209 100644 --- a/contrib/file_fdw/expected/file_fdw.out +++ b/contrib/file_fdw/expected/file_fdw.out @@ -104,6 +104,12 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (on_error 'ignore', reject_limit '0'); -- ERROR ERROR: REJECT_LIMIT (0) must be greater than zero +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '-1'); -- ERROR +ERROR: a negative integer value cannot be specified for header +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '2.5'); -- ERROR +ERROR: header requires a Boolean value, an integer value greater than or equal to zero, or the string "match" +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header 'unsupported'); -- ERROR +ERROR: header requires a Boolean value, an integer value greater than or equal to zero, or the string "match" CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR ERROR: either filename or program is required for file_fdw foreign tables \set filename :abs_srcdir '/data/agg.data' @@ -142,6 +148,25 @@ OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match'); SELECT * FROM header_doesnt_match; -- ERROR ERROR: column name mismatch in header line field 1: got "1", expected "a" CONTEXT: COPY header_doesnt_match, line 1: "1,foo" +-- test multi-line header +\set filename :abs_srcdir '/data/multiline_header.csv' +CREATE FOREIGN TABLE multi_header (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '2'); +SELECT * FROM multi_header ORDER BY a; + a | b +---+------- + 1 | alpha + 2 | beta +(2 rows) + +CREATE FOREIGN TABLE multi_header_skip (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '5'); +SELECT count(*) FROM multi_header_skip; + count +------- + 0 +(1 row) + -- per-column options tests \set filename :abs_srcdir '/data/text.csv' CREATE FOREIGN TABLE text_csv ( @@ -543,7 +568,7 @@ SET ROLE regress_file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 9 other objects +NOTICE: drop cascades to 11 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for regress_file_fdw_superuser on server file_server drop cascades to user mapping for regress_no_priv_user on server file_server @@ -552,5 +577,7 @@ drop cascades to foreign table agg_csv drop cascades to foreign table agg_bad drop cascades to foreign table header_match drop cascades to foreign table header_doesnt_match +drop cascades to foreign table multi_header +drop cascades to foreign table multi_header_skip drop cascades to foreign table text_csv DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user; diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql index 1a397ad4bd1..408affcf87f 100644 --- a/contrib/file_fdw/sql/file_fdw.sql +++ b/contrib/file_fdw/sql/file_fdw.sql @@ -84,6 +84,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', on_erro CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (log_verbosity 'unsupported'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (on_error 'ignore', reject_limit '0'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '-1'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '2.5'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header 'unsupported'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR \set filename :abs_srcdir '/data/agg.data' @@ -119,6 +122,16 @@ CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match'); SELECT * FROM header_doesnt_match; -- ERROR +-- test multi-line header +\set filename :abs_srcdir '/data/multiline_header.csv' +CREATE FOREIGN TABLE multi_header (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '2'); +SELECT * FROM multi_header ORDER BY a; + +CREATE FOREIGN TABLE multi_header_skip (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '5'); +SELECT count(*) FROM multi_header_skip; + -- per-column options tests \set filename :abs_srcdir '/data/text.csv' CREATE FOREIGN TABLE text_csv ( diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index e3fe796b897..3638689436f 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -65,7 +65,7 @@ - Specifies whether the data has a header line, + Specifies whether to skip a header line, or how many header lines to skip, the same as COPY's HEADER option. @@ -179,7 +179,7 @@ to be specified without a corresponding value, the foreign table option syntax requires a value to be present in all cases. To activate COPY options typically written without a value, you can pass - the value TRUE, since all such options are Booleans. + the value TRUE, since all such options accept Booleans. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 1f6b24d66f8..155a79a70c5 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -28,6 +28,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/miscnodes.h" #include "optimizer/optimizer.h" #include "parser/parse_coerce.h" #include "parser/parse_collate.h" @@ -374,6 +375,8 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, static int defGetCopyHeaderOption(DefElem *def, bool is_from) { + int ival = COPY_HEADER_FALSE; + /* * If no parameter value given, assume "true" is meant. */ @@ -381,28 +384,14 @@ defGetCopyHeaderOption(DefElem *def, bool is_from) return COPY_HEADER_TRUE; /* - * Allow an integer value greater than or equal to zero, "true", "false", - * "on", "off", or "match". + * Allow an integer value greater than or equal to zero (integers + * specified as strings are also accepted, mainly for file_fdw foreign + * table options), "true", "false", "on", "off", or "match". */ switch (nodeTag(def->arg)) { case T_Integer: - { - int ival = intVal(def->arg); - - if (ival < 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("a negative integer value cannot be " - "specified for %s", def->defname))); - - if (!is_from && ival > 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot use multi-line header in COPY TO"))); - - return ival; - } + ival = intVal(def->arg); break; default: { @@ -429,17 +418,38 @@ defGetCopyHeaderOption(DefElem *def, bool is_from) sval))); return COPY_HEADER_MATCH; } + else + { + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + /* Check if the header is a valid integer */ + ival = pg_strtoint32_safe(sval, (Node *) &escontext); + if (escontext.error_occurred) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, + second %s is the special value "match" for that option */ + errmsg("%s requires a Boolean value, an integer " + "value greater than or equal to zero, " + "or the string \"%s\"", + def->defname, "match"))); + } } break; } - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, - second %s is the special value "match" for that option */ - errmsg("%s requires a Boolean value, an integer value greater " - "than or equal to zero, or the string \"%s\"", - def->defname, "match"))); - return COPY_HEADER_FALSE; /* keep compiler quiet */ + + if (ival < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("a negative integer value cannot be " + "specified for %s", def->defname))); + + if (!is_from && ival > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use multi-line header in COPY TO"))); + + return ival; } /* diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 797fae0e650..d0d563e0fa8 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -104,6 +104,24 @@ select count(*) from copytest5; 0 (1 row) +-- test header line feature (given as strings) +truncate copytest5; +copy copytest5 from stdin (format csv, header '0'); +select * from copytest5 order by c1; + c1 +---- + 1 + 2 +(2 rows) + +truncate copytest5; +copy copytest5 from stdin (format csv, header '1'); +select * from copytest5 order by c1; + c1 +---- + 2 +(1 row) + -- test copy from with a partitioned table create table parted_copytest ( a int, diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 9c622e760a3..3145b314e48 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -138,6 +138,12 @@ COPY x from stdin with (header 2.5); ERROR: header requires a Boolean value, an integer value greater than or equal to zero, or the string "match" COPY x to stdout with (header 2); ERROR: cannot use multi-line header in COPY TO +COPY x to stdout with (header '-1'); +ERROR: a negative integer value cannot be specified for header +COPY x from stdin with (header '2.5'); +ERROR: header requires a Boolean value, an integer value greater than or equal to zero, or the string "match" +COPY x to stdout with (header '2'); +ERROR: cannot use multi-line header in COPY TO -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; ERROR: column "d" specified more than once diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 676a8b342b5..65cbdaf7f3e 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -124,6 +124,21 @@ this is a second header line. \. select count(*) from copytest5; +-- test header line feature (given as strings) +truncate copytest5; +copy copytest5 from stdin (format csv, header '0'); +1 +2 +\. +select * from copytest5 order by c1; + +truncate copytest5; +copy copytest5 from stdin (format csv, header '1'); +1 +2 +\. +select * from copytest5 order by c1; + -- test copy from with a partitioned table create table parted_copytest ( a int, diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index cef45868db5..66435167500 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -93,6 +93,9 @@ COPY x from stdin with (on_error ignore, reject_limit 0); COPY x from stdin with (header -1); COPY x from stdin with (header 2.5); COPY x to stdout with (header 2); +COPY x to stdout with (header '-1'); +COPY x from stdin with (header '2.5'); +COPY x to stdout with (header '2'); -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin;