1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-03 20:02:46 +03:00

Support multi-line headers in COPY FROM command.

The COPY FROM command now accepts a non-negative integer for the HEADER option,
allowing multiple header lines to be skipped. This is useful when the input
contains multi-line headers that should be ignored during data import.

Author: Shinya Kato <shinya11.kato@gmail.com>
Co-authored-by: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp>
Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
This commit is contained in:
Fujii Masao
2025-07-03 15:27:26 +09:00
parent fd7d7b7191
commit bc2f348e87
10 changed files with 138 additions and 42 deletions

View File

@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
NULL '<replaceable class="parameter">null_string</replaceable>'
DEFAULT '<replaceable class="parameter">default_string</replaceable>'
HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="parameter">integer</replaceable></term>
<listitem>
<para>
Specifies a non-negative integer value passed to the selected option.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>FORMAT</literal></term>
<listitem>
@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
<term><literal>HEADER</literal></term>
<listitem>
<para>
Specifies that the file contains a header line with the names of each
column in the file. On output, the first line contains the column
names from the table. On input, the first line is discarded when this
option is set to <literal>true</literal> (or equivalent Boolean value).
If this option is set to <literal>MATCH</literal>, the number and names
of the columns in the header line must match the actual column names of
the table, in order; otherwise an error is raised.
On output, if this option is set to <literal>true</literal>
(or an equivalent Boolean value), the first line of the output will
contain the column names from the table.
Integer values <literal>0</literal> and <literal>1</literal> are
accepted as Boolean values, but other integers are not allowed for
<command>COPY TO</command> commands.
</para>
<para>
On input, if this option is set to <literal>true</literal>
(or an equivalent Boolean value), the first line of the input is
discarded. If set to a non-negative integer, that number of
lines are discarded. If set to <literal>MATCH</literal>, the first line
is discarded, and it must contain column names that exactly match the
table's columns, in both number and order; otherwise, an error is raised.
The <literal>MATCH</literal> value is only valid for
<command>COPY FROM</command> commands.
</para>
<para>
This option is not allowed when using <literal>binary</literal> format.
The <literal>MATCH</literal> option is only valid for <command>COPY
FROM</command> commands.
</para>
</listitem>
</varlistentry>

View File

@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
}
/*
* Extract a CopyHeaderChoice value from a DefElem. This is like
* defGetBoolean() but also accepts the special value "match".
* Extract the CopyFormatOptions.header_line value from a DefElem.
*
* Parses the HEADER option for COPY, which can be a boolean, a non-negative
* integer (number of lines to skip), or the special value "match".
*/
static CopyHeaderChoice
defGetCopyHeaderChoice(DefElem *def, bool is_from)
static int
defGetCopyHeaderOption(DefElem *def, bool is_from)
{
/*
* If no parameter value given, assume "true" is meant.
@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
return COPY_HEADER_TRUE;
/*
* Allow 0, 1, "true", "false", "on", "off", or "match".
* Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
* "match".
*/
switch (nodeTag(def->arg))
{
case T_Integer:
switch (intVal(def->arg))
{
case 0:
return COPY_HEADER_FALSE;
case 1:
return COPY_HEADER_TRUE;
default:
/* otherwise, error out below */
break;
int ival = intVal(def->arg);
if (ival < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("a negative integer value cannot be "
"specified for %s", def->defname)));
if (!is_from && ival > 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot use multi-line header in COPY TO")));
return ival;
}
break;
default:
@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
}
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("%s requires a Boolean value or \"match\"",
errmsg("%s requires a Boolean value, a non-negative integer, "
"or the string \"match\"",
def->defname)));
return COPY_HEADER_FALSE; /* keep compiler quiet */
}
@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
if (header_specified)
errorConflictingDefElem(defel, pstate);
header_specified = true;
opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
}
else if (strcmp(defel->defname, "quote") == 0)
{
@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
/* Check header */
if (opts_out->binary && opts_out->header_line)
if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */

View File

@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
{
int fldct;
bool done;
bool done = false;
/* only available for text or csv input */
Assert(!cstate->opts.binary);
/* on input check that the header line is correct if needed */
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
{
ListCell *cur;
TupleDesc tupDesc;
int lines_to_skip = cstate->opts.header_line;
/* If set to "match", one header line is skipped */
if (cstate->opts.header_line == COPY_HEADER_MATCH)
lines_to_skip = 1;
tupDesc = RelationGetDescr(cstate->rel);
cstate->cur_lineno++;
done = CopyReadLine(cstate, is_csv);
for (int i = 0; i < lines_to_skip; i++)
{
cstate->cur_lineno++;
if ((done = CopyReadLine(cstate, is_csv)))
break;
}
if (cstate->opts.header_line == COPY_HEADER_MATCH)
{

View File

@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
cstate->file_encoding);
/* if a header has been requested send the line */
if (cstate->opts.header_line)
if (cstate->opts.header_line == COPY_HEADER_TRUE)
{
ListCell *cur;
bool hdr_delim = false;

View File

@ -20,15 +20,12 @@
#include "tcop/dest.h"
/*
* Represents whether a header line should be present, and whether it must
* match the actual names (which implies "true").
* Represents whether a header line must match the actual names
* (which implies "true"), and whether it should be present.
*/
typedef enum CopyHeaderChoice
{
COPY_HEADER_FALSE = 0,
COPY_HEADER_TRUE,
COPY_HEADER_MATCH,
} CopyHeaderChoice;
#define COPY_HEADER_MATCH -1
#define COPY_HEADER_FALSE 0
#define COPY_HEADER_TRUE 1
/*
* Represents where to save input processing errors. More values to be added
@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */
CopyHeaderChoice header_line; /* header line? */
int header_line; /* number of lines to skip or COPY_HEADER_XXX
* value (see the above) */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
char *null_print_client; /* same converted to file encoding */

View File

@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
c1 colname with tab: \t
1 a
2 b
-- test multi-line header line feature
create temp table copytest5 (c1 int);
copy copytest5 from stdin (format csv, header 2);
copy copytest5 to stdout (header);
c1
1
2
truncate copytest5;
copy copytest5 from stdin (format csv, header 4);
select count(*) from copytest5;
count
-------
0
(1 row)
truncate copytest5;
copy copytest5 from stdin (format csv, header 5);
select count(*) from copytest5;
count
-------
0
(1 row)
-- test copy from with a partitioned table
create table parted_copytest (
a int,
@ -224,7 +247,7 @@ alter table header_copytest add column c text;
copy header_copytest to stdout with (header match);
ERROR: cannot use "match" with HEADER in COPY TO
copy header_copytest from stdin with (header wrong_choice);
ERROR: header requires a Boolean value or "match"
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
-- works
copy header_copytest from stdin with (header match);
copy header_copytest (c, a, b) from stdin with (header match);

View File

@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
COPY x from stdin with (on_error ignore, reject_limit 0);
ERROR: REJECT_LIMIT (0) must be greater than zero
COPY x from stdin with (header -1);
ERROR: a negative integer value cannot be specified for header
COPY x from stdin with (header 2.5);
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
COPY x to stdout with (header 2);
ERROR: cannot use multi-line header in COPY TO
-- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin;
ERROR: column "d" specified more than once

View File

@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
copy copytest4 to stdout (header);
-- test multi-line header line feature
create temp table copytest5 (c1 int);
copy copytest5 from stdin (format csv, header 2);
this is a first header line.
this is a second header line.
1
2
\.
copy copytest5 to stdout (header);
truncate copytest5;
copy copytest5 from stdin (format csv, header 4);
this is a first header line.
this is a second header line.
1
2
\.
select count(*) from copytest5;
truncate copytest5;
copy copytest5 from stdin (format csv, header 5);
this is a first header line.
this is a second header line.
1
2
\.
select count(*) from copytest5;
-- test copy from with a partitioned table
create table parted_copytest (
a int,

View File

@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
COPY x from stdin (log_verbosity unsupported);
COPY x from stdin with (reject_limit 1);
COPY x from stdin with (on_error ignore, reject_limit 0);
COPY x from stdin with (header -1);
COPY x from stdin with (header 2.5);
COPY x to stdout with (header 2);
-- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin;

View File

@ -521,7 +521,6 @@ CopyFormatOptions
CopyFromRoutine
CopyFromState
CopyFromStateData
CopyHeaderChoice
CopyInsertMethod
CopyLogVerbosityChoice
CopyMethod