1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-03 20:02:46 +03:00

Support multi-line headers in COPY FROM command.

The COPY FROM command now accepts a non-negative integer for the HEADER option,
allowing multiple header lines to be skipped. This is useful when the input
contains multi-line headers that should be ignored during data import.

Author: Shinya Kato <shinya11.kato@gmail.com>
Co-authored-by: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp>
Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
This commit is contained in:
Fujii Masao
2025-07-03 15:27:26 +09:00
parent fd7d7b7191
commit bc2f348e87
10 changed files with 138 additions and 42 deletions

View File

@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>' DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
NULL '<replaceable class="parameter">null_string</replaceable>' NULL '<replaceable class="parameter">null_string</replaceable>'
DEFAULT '<replaceable class="parameter">default_string</replaceable>' DEFAULT '<replaceable class="parameter">default_string</replaceable>'
HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ] HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
QUOTE '<replaceable class="parameter">quote_character</replaceable>' QUOTE '<replaceable class="parameter">quote_character</replaceable>'
ESCAPE '<replaceable class="parameter">escape_character</replaceable>' ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><replaceable class="parameter">integer</replaceable></term>
<listitem>
<para>
Specifies a non-negative integer value passed to the selected option.
</para>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term><literal>FORMAT</literal></term> <term><literal>FORMAT</literal></term>
<listitem> <listitem>
@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
<term><literal>HEADER</literal></term> <term><literal>HEADER</literal></term>
<listitem> <listitem>
<para> <para>
Specifies that the file contains a header line with the names of each On output, if this option is set to <literal>true</literal>
column in the file. On output, the first line contains the column (or an equivalent Boolean value), the first line of the output will
names from the table. On input, the first line is discarded when this contain the column names from the table.
option is set to <literal>true</literal> (or equivalent Boolean value). Integer values <literal>0</literal> and <literal>1</literal> are
If this option is set to <literal>MATCH</literal>, the number and names accepted as Boolean values, but other integers are not allowed for
of the columns in the header line must match the actual column names of <command>COPY TO</command> commands.
the table, in order; otherwise an error is raised. </para>
<para>
On input, if this option is set to <literal>true</literal>
(or an equivalent Boolean value), the first line of the input is
discarded. If set to a non-negative integer, that number of
lines are discarded. If set to <literal>MATCH</literal>, the first line
is discarded, and it must contain column names that exactly match the
table's columns, in both number and order; otherwise, an error is raised.
The <literal>MATCH</literal> value is only valid for
<command>COPY FROM</command> commands.
</para>
<para>
This option is not allowed when using <literal>binary</literal> format. This option is not allowed when using <literal>binary</literal> format.
The <literal>MATCH</literal> option is only valid for <command>COPY
FROM</command> commands.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>

View File

@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
} }
/* /*
* Extract a CopyHeaderChoice value from a DefElem. This is like * Extract the CopyFormatOptions.header_line value from a DefElem.
* defGetBoolean() but also accepts the special value "match". *
* Parses the HEADER option for COPY, which can be a boolean, a non-negative
* integer (number of lines to skip), or the special value "match".
*/ */
static CopyHeaderChoice static int
defGetCopyHeaderChoice(DefElem *def, bool is_from) defGetCopyHeaderOption(DefElem *def, bool is_from)
{ {
/* /*
* If no parameter value given, assume "true" is meant. * If no parameter value given, assume "true" is meant.
@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
return COPY_HEADER_TRUE; return COPY_HEADER_TRUE;
/* /*
* Allow 0, 1, "true", "false", "on", "off", or "match". * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
* "match".
*/ */
switch (nodeTag(def->arg)) switch (nodeTag(def->arg))
{ {
case T_Integer: case T_Integer:
switch (intVal(def->arg))
{ {
case 0: int ival = intVal(def->arg);
return COPY_HEADER_FALSE;
case 1: if (ival < 0)
return COPY_HEADER_TRUE; ereport(ERROR,
default: (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
/* otherwise, error out below */ errmsg("a negative integer value cannot be "
break; "specified for %s", def->defname)));
if (!is_from && ival > 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot use multi-line header in COPY TO")));
return ival;
} }
break; break;
default: default:
@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
} }
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("%s requires a Boolean value or \"match\"", errmsg("%s requires a Boolean value, a non-negative integer, "
"or the string \"match\"",
def->defname))); def->defname)));
return COPY_HEADER_FALSE; /* keep compiler quiet */ return COPY_HEADER_FALSE; /* keep compiler quiet */
} }
@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
if (header_specified) if (header_specified)
errorConflictingDefElem(defel, pstate); errorConflictingDefElem(defel, pstate);
header_specified = true; header_specified = true;
opts_out->header_line = defGetCopyHeaderChoice(defel, is_from); opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
} }
else if (strcmp(defel->defname, "quote") == 0) else if (strcmp(defel->defname, "quote") == 0)
{ {
@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
/* Check header */ /* Check header */
if (opts_out->binary && opts_out->header_line) if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */

View File

@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv) NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
{ {
int fldct; int fldct;
bool done; bool done = false;
/* only available for text or csv input */ /* only available for text or csv input */
Assert(!cstate->opts.binary); Assert(!cstate->opts.binary);
/* on input check that the header line is correct if needed */ /* on input check that the header line is correct if needed */
if (cstate->cur_lineno == 0 && cstate->opts.header_line) if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
{ {
ListCell *cur; ListCell *cur;
TupleDesc tupDesc; TupleDesc tupDesc;
int lines_to_skip = cstate->opts.header_line;
/* If set to "match", one header line is skipped */
if (cstate->opts.header_line == COPY_HEADER_MATCH)
lines_to_skip = 1;
tupDesc = RelationGetDescr(cstate->rel); tupDesc = RelationGetDescr(cstate->rel);
cstate->cur_lineno++; for (int i = 0; i < lines_to_skip; i++)
done = CopyReadLine(cstate, is_csv); {
cstate->cur_lineno++;
if ((done = CopyReadLine(cstate, is_csv)))
break;
}
if (cstate->opts.header_line == COPY_HEADER_MATCH) if (cstate->opts.header_line == COPY_HEADER_MATCH)
{ {

View File

@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
cstate->file_encoding); cstate->file_encoding);
/* if a header has been requested send the line */ /* if a header has been requested send the line */
if (cstate->opts.header_line) if (cstate->opts.header_line == COPY_HEADER_TRUE)
{ {
ListCell *cur; ListCell *cur;
bool hdr_delim = false; bool hdr_delim = false;

View File

@ -20,15 +20,12 @@
#include "tcop/dest.h" #include "tcop/dest.h"
/* /*
* Represents whether a header line should be present, and whether it must * Represents whether a header line must match the actual names
* match the actual names (which implies "true"). * (which implies "true"), and whether it should be present.
*/ */
typedef enum CopyHeaderChoice #define COPY_HEADER_MATCH -1
{ #define COPY_HEADER_FALSE 0
COPY_HEADER_FALSE = 0, #define COPY_HEADER_TRUE 1
COPY_HEADER_TRUE,
COPY_HEADER_MATCH,
} CopyHeaderChoice;
/* /*
* Represents where to save input processing errors. More values to be added * Represents where to save input processing errors. More values to be added
@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
bool binary; /* binary format? */ bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */ bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */ bool csv_mode; /* Comma Separated Value format? */
CopyHeaderChoice header_line; /* header line? */ int header_line; /* number of lines to skip or COPY_HEADER_XXX
* value (see the above) */
char *null_print; /* NULL marker string (server encoding!) */ char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */ int null_print_len; /* length of same */
char *null_print_client; /* same converted to file encoding */ char *null_print_client; /* same converted to file encoding */

View File

@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
c1 colname with tab: \t c1 colname with tab: \t
1 a 1 a
2 b 2 b
-- test multi-line header line feature
create temp table copytest5 (c1 int);
copy copytest5 from stdin (format csv, header 2);
copy copytest5 to stdout (header);
c1
1
2
truncate copytest5;
copy copytest5 from stdin (format csv, header 4);
select count(*) from copytest5;
count
-------
0
(1 row)
truncate copytest5;
copy copytest5 from stdin (format csv, header 5);
select count(*) from copytest5;
count
-------
0
(1 row)
-- test copy from with a partitioned table -- test copy from with a partitioned table
create table parted_copytest ( create table parted_copytest (
a int, a int,
@ -224,7 +247,7 @@ alter table header_copytest add column c text;
copy header_copytest to stdout with (header match); copy header_copytest to stdout with (header match);
ERROR: cannot use "match" with HEADER in COPY TO ERROR: cannot use "match" with HEADER in COPY TO
copy header_copytest from stdin with (header wrong_choice); copy header_copytest from stdin with (header wrong_choice);
ERROR: header requires a Boolean value or "match" ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
-- works -- works
copy header_copytest from stdin with (header match); copy header_copytest from stdin with (header match);
copy header_copytest (c, a, b) from stdin with (header match); copy header_copytest (c, a, b) from stdin with (header match);

View File

@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
COPY x from stdin with (on_error ignore, reject_limit 0); COPY x from stdin with (on_error ignore, reject_limit 0);
ERROR: REJECT_LIMIT (0) must be greater than zero ERROR: REJECT_LIMIT (0) must be greater than zero
COPY x from stdin with (header -1);
ERROR: a negative integer value cannot be specified for header
COPY x from stdin with (header 2.5);
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
COPY x to stdout with (header 2);
ERROR: cannot use multi-line header in COPY TO
-- too many columns in column list: should fail -- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin; COPY x (a, b, c, d, e, d, c) from stdin;
ERROR: column "d" specified more than once ERROR: column "d" specified more than once

View File

@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
copy copytest4 to stdout (header); copy copytest4 to stdout (header);
-- test multi-line header line feature
create temp table copytest5 (c1 int);
copy copytest5 from stdin (format csv, header 2);
this is a first header line.
this is a second header line.
1
2
\.
copy copytest5 to stdout (header);
truncate copytest5;
copy copytest5 from stdin (format csv, header 4);
this is a first header line.
this is a second header line.
1
2
\.
select count(*) from copytest5;
truncate copytest5;
copy copytest5 from stdin (format csv, header 5);
this is a first header line.
this is a second header line.
1
2
\.
select count(*) from copytest5;
-- test copy from with a partitioned table -- test copy from with a partitioned table
create table parted_copytest ( create table parted_copytest (
a int, a int,

View File

@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
COPY x from stdin (log_verbosity unsupported); COPY x from stdin (log_verbosity unsupported);
COPY x from stdin with (reject_limit 1); COPY x from stdin with (reject_limit 1);
COPY x from stdin with (on_error ignore, reject_limit 0); COPY x from stdin with (on_error ignore, reject_limit 0);
COPY x from stdin with (header -1);
COPY x from stdin with (header 2.5);
COPY x to stdout with (header 2);
-- too many columns in column list: should fail -- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin; COPY x (a, b, c, d, e, d, c) from stdin;

View File

@ -521,7 +521,6 @@ CopyFormatOptions
CopyFromRoutine CopyFromRoutine
CopyFromState CopyFromState
CopyFromStateData CopyFromStateData
CopyHeaderChoice
CopyInsertMethod CopyInsertMethod
CopyLogVerbosityChoice CopyLogVerbosityChoice
CopyMethod CopyMethod