mirror of
https://github.com/postgres/postgres.git
synced 2025-07-03 20:02:46 +03:00
Support multi-line headers in COPY FROM command.
The COPY FROM command now accepts a non-negative integer for the HEADER option, allowing multiple header lines to be skipped. This is useful when the input contains multi-line headers that should be ignored during data import. Author: Shinya Kato <shinya11.kato@gmail.com> Co-authored-by: Fujii Masao <masao.fujii@gmail.com> Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp> Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
This commit is contained in:
@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
|||||||
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
|
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
|
||||||
NULL '<replaceable class="parameter">null_string</replaceable>'
|
NULL '<replaceable class="parameter">null_string</replaceable>'
|
||||||
DEFAULT '<replaceable class="parameter">default_string</replaceable>'
|
DEFAULT '<replaceable class="parameter">default_string</replaceable>'
|
||||||
HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
|
HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
|
||||||
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
|
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
|
||||||
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
|
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
|
||||||
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
||||||
@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><replaceable class="parameter">integer</replaceable></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Specifies a non-negative integer value passed to the selected option.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><literal>FORMAT</literal></term>
|
<term><literal>FORMAT</literal></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
|||||||
<term><literal>HEADER</literal></term>
|
<term><literal>HEADER</literal></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Specifies that the file contains a header line with the names of each
|
On output, if this option is set to <literal>true</literal>
|
||||||
column in the file. On output, the first line contains the column
|
(or an equivalent Boolean value), the first line of the output will
|
||||||
names from the table. On input, the first line is discarded when this
|
contain the column names from the table.
|
||||||
option is set to <literal>true</literal> (or equivalent Boolean value).
|
Integer values <literal>0</literal> and <literal>1</literal> are
|
||||||
If this option is set to <literal>MATCH</literal>, the number and names
|
accepted as Boolean values, but other integers are not allowed for
|
||||||
of the columns in the header line must match the actual column names of
|
<command>COPY TO</command> commands.
|
||||||
the table, in order; otherwise an error is raised.
|
</para>
|
||||||
|
<para>
|
||||||
|
On input, if this option is set to <literal>true</literal>
|
||||||
|
(or an equivalent Boolean value), the first line of the input is
|
||||||
|
discarded. If set to a non-negative integer, that number of
|
||||||
|
lines are discarded. If set to <literal>MATCH</literal>, the first line
|
||||||
|
is discarded, and it must contain column names that exactly match the
|
||||||
|
table's columns, in both number and order; otherwise, an error is raised.
|
||||||
|
The <literal>MATCH</literal> value is only valid for
|
||||||
|
<command>COPY FROM</command> commands.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
This option is not allowed when using <literal>binary</literal> format.
|
This option is not allowed when using <literal>binary</literal> format.
|
||||||
The <literal>MATCH</literal> option is only valid for <command>COPY
|
|
||||||
FROM</command> commands.
|
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extract a CopyHeaderChoice value from a DefElem. This is like
|
* Extract the CopyFormatOptions.header_line value from a DefElem.
|
||||||
* defGetBoolean() but also accepts the special value "match".
|
*
|
||||||
|
* Parses the HEADER option for COPY, which can be a boolean, a non-negative
|
||||||
|
* integer (number of lines to skip), or the special value "match".
|
||||||
*/
|
*/
|
||||||
static CopyHeaderChoice
|
static int
|
||||||
defGetCopyHeaderChoice(DefElem *def, bool is_from)
|
defGetCopyHeaderOption(DefElem *def, bool is_from)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If no parameter value given, assume "true" is meant.
|
* If no parameter value given, assume "true" is meant.
|
||||||
@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
|
|||||||
return COPY_HEADER_TRUE;
|
return COPY_HEADER_TRUE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allow 0, 1, "true", "false", "on", "off", or "match".
|
* Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
|
||||||
|
* "match".
|
||||||
*/
|
*/
|
||||||
switch (nodeTag(def->arg))
|
switch (nodeTag(def->arg))
|
||||||
{
|
{
|
||||||
case T_Integer:
|
case T_Integer:
|
||||||
switch (intVal(def->arg))
|
|
||||||
{
|
{
|
||||||
case 0:
|
int ival = intVal(def->arg);
|
||||||
return COPY_HEADER_FALSE;
|
|
||||||
case 1:
|
if (ival < 0)
|
||||||
return COPY_HEADER_TRUE;
|
ereport(ERROR,
|
||||||
default:
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
/* otherwise, error out below */
|
errmsg("a negative integer value cannot be "
|
||||||
break;
|
"specified for %s", def->defname)));
|
||||||
|
|
||||||
|
if (!is_from && ival > 1)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("cannot use multi-line header in COPY TO")));
|
||||||
|
|
||||||
|
return ival;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
|
|||||||
}
|
}
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||||
errmsg("%s requires a Boolean value or \"match\"",
|
errmsg("%s requires a Boolean value, a non-negative integer, "
|
||||||
|
"or the string \"match\"",
|
||||||
def->defname)));
|
def->defname)));
|
||||||
return COPY_HEADER_FALSE; /* keep compiler quiet */
|
return COPY_HEADER_FALSE; /* keep compiler quiet */
|
||||||
}
|
}
|
||||||
@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
|
|||||||
if (header_specified)
|
if (header_specified)
|
||||||
errorConflictingDefElem(defel, pstate);
|
errorConflictingDefElem(defel, pstate);
|
||||||
header_specified = true;
|
header_specified = true;
|
||||||
opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
|
opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
|
||||||
}
|
}
|
||||||
else if (strcmp(defel->defname, "quote") == 0)
|
else if (strcmp(defel->defname, "quote") == 0)
|
||||||
{
|
{
|
||||||
@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
|
|||||||
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
|
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
|
||||||
|
|
||||||
/* Check header */
|
/* Check header */
|
||||||
if (opts_out->binary && opts_out->header_line)
|
if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
|
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
|
||||||
|
@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
|
|||||||
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
|
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
|
||||||
{
|
{
|
||||||
int fldct;
|
int fldct;
|
||||||
bool done;
|
bool done = false;
|
||||||
|
|
||||||
/* only available for text or csv input */
|
/* only available for text or csv input */
|
||||||
Assert(!cstate->opts.binary);
|
Assert(!cstate->opts.binary);
|
||||||
|
|
||||||
/* on input check that the header line is correct if needed */
|
/* on input check that the header line is correct if needed */
|
||||||
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
|
if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
|
||||||
{
|
{
|
||||||
ListCell *cur;
|
ListCell *cur;
|
||||||
TupleDesc tupDesc;
|
TupleDesc tupDesc;
|
||||||
|
int lines_to_skip = cstate->opts.header_line;
|
||||||
|
|
||||||
|
/* If set to "match", one header line is skipped */
|
||||||
|
if (cstate->opts.header_line == COPY_HEADER_MATCH)
|
||||||
|
lines_to_skip = 1;
|
||||||
|
|
||||||
tupDesc = RelationGetDescr(cstate->rel);
|
tupDesc = RelationGetDescr(cstate->rel);
|
||||||
|
|
||||||
cstate->cur_lineno++;
|
for (int i = 0; i < lines_to_skip; i++)
|
||||||
done = CopyReadLine(cstate, is_csv);
|
{
|
||||||
|
cstate->cur_lineno++;
|
||||||
|
if ((done = CopyReadLine(cstate, is_csv)))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (cstate->opts.header_line == COPY_HEADER_MATCH)
|
if (cstate->opts.header_line == COPY_HEADER_MATCH)
|
||||||
{
|
{
|
||||||
|
@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
|
|||||||
cstate->file_encoding);
|
cstate->file_encoding);
|
||||||
|
|
||||||
/* if a header has been requested send the line */
|
/* if a header has been requested send the line */
|
||||||
if (cstate->opts.header_line)
|
if (cstate->opts.header_line == COPY_HEADER_TRUE)
|
||||||
{
|
{
|
||||||
ListCell *cur;
|
ListCell *cur;
|
||||||
bool hdr_delim = false;
|
bool hdr_delim = false;
|
||||||
|
@ -20,15 +20,12 @@
|
|||||||
#include "tcop/dest.h"
|
#include "tcop/dest.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Represents whether a header line should be present, and whether it must
|
* Represents whether a header line must match the actual names
|
||||||
* match the actual names (which implies "true").
|
* (which implies "true"), and whether it should be present.
|
||||||
*/
|
*/
|
||||||
typedef enum CopyHeaderChoice
|
#define COPY_HEADER_MATCH -1
|
||||||
{
|
#define COPY_HEADER_FALSE 0
|
||||||
COPY_HEADER_FALSE = 0,
|
#define COPY_HEADER_TRUE 1
|
||||||
COPY_HEADER_TRUE,
|
|
||||||
COPY_HEADER_MATCH,
|
|
||||||
} CopyHeaderChoice;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Represents where to save input processing errors. More values to be added
|
* Represents where to save input processing errors. More values to be added
|
||||||
@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
|
|||||||
bool binary; /* binary format? */
|
bool binary; /* binary format? */
|
||||||
bool freeze; /* freeze rows on loading? */
|
bool freeze; /* freeze rows on loading? */
|
||||||
bool csv_mode; /* Comma Separated Value format? */
|
bool csv_mode; /* Comma Separated Value format? */
|
||||||
CopyHeaderChoice header_line; /* header line? */
|
int header_line; /* number of lines to skip or COPY_HEADER_XXX
|
||||||
|
* value (see the above) */
|
||||||
char *null_print; /* NULL marker string (server encoding!) */
|
char *null_print; /* NULL marker string (server encoding!) */
|
||||||
int null_print_len; /* length of same */
|
int null_print_len; /* length of same */
|
||||||
char *null_print_client; /* same converted to file encoding */
|
char *null_print_client; /* same converted to file encoding */
|
||||||
|
@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
|
|||||||
c1 colname with tab: \t
|
c1 colname with tab: \t
|
||||||
1 a
|
1 a
|
||||||
2 b
|
2 b
|
||||||
|
-- test multi-line header line feature
|
||||||
|
create temp table copytest5 (c1 int);
|
||||||
|
copy copytest5 from stdin (format csv, header 2);
|
||||||
|
copy copytest5 to stdout (header);
|
||||||
|
c1
|
||||||
|
1
|
||||||
|
2
|
||||||
|
truncate copytest5;
|
||||||
|
copy copytest5 from stdin (format csv, header 4);
|
||||||
|
select count(*) from copytest5;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
0
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
truncate copytest5;
|
||||||
|
copy copytest5 from stdin (format csv, header 5);
|
||||||
|
select count(*) from copytest5;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
0
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- test copy from with a partitioned table
|
-- test copy from with a partitioned table
|
||||||
create table parted_copytest (
|
create table parted_copytest (
|
||||||
a int,
|
a int,
|
||||||
@ -224,7 +247,7 @@ alter table header_copytest add column c text;
|
|||||||
copy header_copytest to stdout with (header match);
|
copy header_copytest to stdout with (header match);
|
||||||
ERROR: cannot use "match" with HEADER in COPY TO
|
ERROR: cannot use "match" with HEADER in COPY TO
|
||||||
copy header_copytest from stdin with (header wrong_choice);
|
copy header_copytest from stdin with (header wrong_choice);
|
||||||
ERROR: header requires a Boolean value or "match"
|
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
|
||||||
-- works
|
-- works
|
||||||
copy header_copytest from stdin with (header match);
|
copy header_copytest from stdin with (header match);
|
||||||
copy header_copytest (c, a, b) from stdin with (header match);
|
copy header_copytest (c, a, b) from stdin with (header match);
|
||||||
|
@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
|
|||||||
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
|
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
|
||||||
COPY x from stdin with (on_error ignore, reject_limit 0);
|
COPY x from stdin with (on_error ignore, reject_limit 0);
|
||||||
ERROR: REJECT_LIMIT (0) must be greater than zero
|
ERROR: REJECT_LIMIT (0) must be greater than zero
|
||||||
|
COPY x from stdin with (header -1);
|
||||||
|
ERROR: a negative integer value cannot be specified for header
|
||||||
|
COPY x from stdin with (header 2.5);
|
||||||
|
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
|
||||||
|
COPY x to stdout with (header 2);
|
||||||
|
ERROR: cannot use multi-line header in COPY TO
|
||||||
-- too many columns in column list: should fail
|
-- too many columns in column list: should fail
|
||||||
COPY x (a, b, c, d, e, d, c) from stdin;
|
COPY x (a, b, c, d, e, d, c) from stdin;
|
||||||
ERROR: column "d" specified more than once
|
ERROR: column "d" specified more than once
|
||||||
|
@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
|
|||||||
|
|
||||||
copy copytest4 to stdout (header);
|
copy copytest4 to stdout (header);
|
||||||
|
|
||||||
|
-- test multi-line header line feature
|
||||||
|
|
||||||
|
create temp table copytest5 (c1 int);
|
||||||
|
|
||||||
|
copy copytest5 from stdin (format csv, header 2);
|
||||||
|
this is a first header line.
|
||||||
|
this is a second header line.
|
||||||
|
1
|
||||||
|
2
|
||||||
|
\.
|
||||||
|
copy copytest5 to stdout (header);
|
||||||
|
|
||||||
|
truncate copytest5;
|
||||||
|
copy copytest5 from stdin (format csv, header 4);
|
||||||
|
this is a first header line.
|
||||||
|
this is a second header line.
|
||||||
|
1
|
||||||
|
2
|
||||||
|
\.
|
||||||
|
select count(*) from copytest5;
|
||||||
|
|
||||||
|
truncate copytest5;
|
||||||
|
copy copytest5 from stdin (format csv, header 5);
|
||||||
|
this is a first header line.
|
||||||
|
this is a second header line.
|
||||||
|
1
|
||||||
|
2
|
||||||
|
\.
|
||||||
|
select count(*) from copytest5;
|
||||||
|
|
||||||
-- test copy from with a partitioned table
|
-- test copy from with a partitioned table
|
||||||
create table parted_copytest (
|
create table parted_copytest (
|
||||||
a int,
|
a int,
|
||||||
|
@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
|
|||||||
COPY x from stdin (log_verbosity unsupported);
|
COPY x from stdin (log_verbosity unsupported);
|
||||||
COPY x from stdin with (reject_limit 1);
|
COPY x from stdin with (reject_limit 1);
|
||||||
COPY x from stdin with (on_error ignore, reject_limit 0);
|
COPY x from stdin with (on_error ignore, reject_limit 0);
|
||||||
|
COPY x from stdin with (header -1);
|
||||||
|
COPY x from stdin with (header 2.5);
|
||||||
|
COPY x to stdout with (header 2);
|
||||||
|
|
||||||
-- too many columns in column list: should fail
|
-- too many columns in column list: should fail
|
||||||
COPY x (a, b, c, d, e, d, c) from stdin;
|
COPY x (a, b, c, d, e, d, c) from stdin;
|
||||||
|
@ -521,7 +521,6 @@ CopyFormatOptions
|
|||||||
CopyFromRoutine
|
CopyFromRoutine
|
||||||
CopyFromState
|
CopyFromState
|
||||||
CopyFromStateData
|
CopyFromStateData
|
||||||
CopyHeaderChoice
|
|
||||||
CopyInsertMethod
|
CopyInsertMethod
|
||||||
CopyLogVerbosityChoice
|
CopyLogVerbosityChoice
|
||||||
CopyMethod
|
CopyMethod
|
||||||
|
Reference in New Issue
Block a user