1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Support multi-line headers in COPY FROM command.

The COPY FROM command now accepts a non-negative integer for the HEADER option,
allowing multiple header lines to be skipped. This is useful when the input
contains multi-line headers that should be ignored during data import.

Author: Shinya Kato <shinya11.kato@gmail.com>
Co-authored-by: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp>
Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
This commit is contained in:
Fujii Masao
2025-07-03 15:27:26 +09:00
parent fd7d7b7191
commit bc2f348e87
10 changed files with 138 additions and 42 deletions

View File

@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
}
/*
* Extract a CopyHeaderChoice value from a DefElem. This is like
* defGetBoolean() but also accepts the special value "match".
* Extract the CopyFormatOptions.header_line value from a DefElem.
*
* Parses the HEADER option for COPY, which can be a boolean, a non-negative
* integer (number of lines to skip), or the special value "match".
*/
static CopyHeaderChoice
defGetCopyHeaderChoice(DefElem *def, bool is_from)
static int
defGetCopyHeaderOption(DefElem *def, bool is_from)
{
/*
* If no parameter value given, assume "true" is meant.
@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
return COPY_HEADER_TRUE;
/*
* Allow 0, 1, "true", "false", "on", "off", or "match".
* Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
* "match".
*/
switch (nodeTag(def->arg))
{
case T_Integer:
switch (intVal(def->arg))
{
case 0:
return COPY_HEADER_FALSE;
case 1:
return COPY_HEADER_TRUE;
default:
/* otherwise, error out below */
break;
int ival = intVal(def->arg);
if (ival < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("a negative integer value cannot be "
"specified for %s", def->defname)));
if (!is_from && ival > 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot use multi-line header in COPY TO")));
return ival;
}
break;
default:
@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
}
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("%s requires a Boolean value or \"match\"",
errmsg("%s requires a Boolean value, a non-negative integer, "
"or the string \"match\"",
def->defname)));
return COPY_HEADER_FALSE; /* keep compiler quiet */
}
@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
if (header_specified)
errorConflictingDefElem(defel, pstate);
header_specified = true;
opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
}
else if (strcmp(defel->defname, "quote") == 0)
{
@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
/* Check header */
if (opts_out->binary && opts_out->header_line)
if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */

View File

@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
{
int fldct;
bool done;
bool done = false;
/* only available for text or csv input */
Assert(!cstate->opts.binary);
/* on input check that the header line is correct if needed */
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
{
ListCell *cur;
TupleDesc tupDesc;
int lines_to_skip = cstate->opts.header_line;
/* If set to "match", one header line is skipped */
if (cstate->opts.header_line == COPY_HEADER_MATCH)
lines_to_skip = 1;
tupDesc = RelationGetDescr(cstate->rel);
cstate->cur_lineno++;
done = CopyReadLine(cstate, is_csv);
for (int i = 0; i < lines_to_skip; i++)
{
cstate->cur_lineno++;
if ((done = CopyReadLine(cstate, is_csv)))
break;
}
if (cstate->opts.header_line == COPY_HEADER_MATCH)
{

View File

@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
cstate->file_encoding);
/* if a header has been requested send the line */
if (cstate->opts.header_line)
if (cstate->opts.header_line == COPY_HEADER_TRUE)
{
ListCell *cur;
bool hdr_delim = false;

View File

@ -20,15 +20,12 @@
#include "tcop/dest.h"
/*
* Represents whether a header line should be present, and whether it must
* match the actual names (which implies "true").
* Represents whether a header line must match the actual names
* (which implies "true"), and whether it should be present.
*/
typedef enum CopyHeaderChoice
{
COPY_HEADER_FALSE = 0,
COPY_HEADER_TRUE,
COPY_HEADER_MATCH,
} CopyHeaderChoice;
#define COPY_HEADER_MATCH -1
#define COPY_HEADER_FALSE 0
#define COPY_HEADER_TRUE 1
/*
* Represents where to save input processing errors. More values to be added
@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */
CopyHeaderChoice header_line; /* header line? */
int header_line; /* number of lines to skip or COPY_HEADER_XXX
* value (see the above) */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
char *null_print_client; /* same converted to file encoding */

View File

@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
c1 colname with tab: \t
1 a
2 b
-- test multi-line header line feature
create temp table copytest5 (c1 int);
copy copytest5 from stdin (format csv, header 2);
copy copytest5 to stdout (header);
c1
1
2
truncate copytest5;
copy copytest5 from stdin (format csv, header 4);
select count(*) from copytest5;
count
-------
0
(1 row)
truncate copytest5;
copy copytest5 from stdin (format csv, header 5);
select count(*) from copytest5;
count
-------
0
(1 row)
-- test copy from with a partitioned table
create table parted_copytest (
a int,
@ -224,7 +247,7 @@ alter table header_copytest add column c text;
copy header_copytest to stdout with (header match);
ERROR: cannot use "match" with HEADER in COPY TO
copy header_copytest from stdin with (header wrong_choice);
ERROR: header requires a Boolean value or "match"
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
-- works
copy header_copytest from stdin with (header match);
copy header_copytest (c, a, b) from stdin with (header match);

View File

@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
COPY x from stdin with (on_error ignore, reject_limit 0);
ERROR: REJECT_LIMIT (0) must be greater than zero
COPY x from stdin with (header -1);
ERROR: a negative integer value cannot be specified for header
COPY x from stdin with (header 2.5);
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
COPY x to stdout with (header 2);
ERROR: cannot use multi-line header in COPY TO
-- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin;
ERROR: column "d" specified more than once

View File

@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
copy copytest4 to stdout (header);
-- test multi-line header line feature
create temp table copytest5 (c1 int);
copy copytest5 from stdin (format csv, header 2);
this is a first header line.
this is a second header line.
1
2
\.
copy copytest5 to stdout (header);
truncate copytest5;
copy copytest5 from stdin (format csv, header 4);
this is a first header line.
this is a second header line.
1
2
\.
select count(*) from copytest5;
truncate copytest5;
copy copytest5 from stdin (format csv, header 5);
this is a first header line.
this is a second header line.
1
2
\.
select count(*) from copytest5;
-- test copy from with a partitioned table
create table parted_copytest (
a int,

View File

@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
COPY x from stdin (log_verbosity unsupported);
COPY x from stdin with (reject_limit 1);
COPY x from stdin with (on_error ignore, reject_limit 0);
COPY x from stdin with (header -1);
COPY x from stdin with (header 2.5);
COPY x to stdout with (header 2);
-- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin;

View File

@ -521,7 +521,6 @@ CopyFormatOptions
CopyFromRoutine
CopyFromState
CopyFromStateData
CopyHeaderChoice
CopyInsertMethod
CopyLogVerbosityChoice
CopyMethod