mirror of
https://github.com/postgres/postgres.git
synced 2025-07-03 20:02:46 +03:00
Support multi-line headers in COPY FROM command.
The COPY FROM command now accepts a non-negative integer for the HEADER option, allowing multiple header lines to be skipped. This is useful when the input contains multi-line headers that should be ignored during data import. Author: Shinya Kato <shinya11.kato@gmail.com> Co-authored-by: Fujii Masao <masao.fujii@gmail.com> Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp> Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
This commit is contained in:
@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
||||
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
|
||||
NULL '<replaceable class="parameter">null_string</replaceable>'
|
||||
DEFAULT '<replaceable class="parameter">default_string</replaceable>'
|
||||
HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
|
||||
HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
|
||||
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
|
||||
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
|
||||
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
||||
@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><replaceable class="parameter">integer</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Specifies a non-negative integer value passed to the selected option.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><literal>FORMAT</literal></term>
|
||||
<listitem>
|
||||
@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
||||
<term><literal>HEADER</literal></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Specifies that the file contains a header line with the names of each
|
||||
column in the file. On output, the first line contains the column
|
||||
names from the table. On input, the first line is discarded when this
|
||||
option is set to <literal>true</literal> (or equivalent Boolean value).
|
||||
If this option is set to <literal>MATCH</literal>, the number and names
|
||||
of the columns in the header line must match the actual column names of
|
||||
the table, in order; otherwise an error is raised.
|
||||
On output, if this option is set to <literal>true</literal>
|
||||
(or an equivalent Boolean value), the first line of the output will
|
||||
contain the column names from the table.
|
||||
Integer values <literal>0</literal> and <literal>1</literal> are
|
||||
accepted as Boolean values, but other integers are not allowed for
|
||||
<command>COPY TO</command> commands.
|
||||
</para>
|
||||
<para>
|
||||
On input, if this option is set to <literal>true</literal>
|
||||
(or an equivalent Boolean value), the first line of the input is
|
||||
discarded. If set to a non-negative integer, that number of
|
||||
lines are discarded. If set to <literal>MATCH</literal>, the first line
|
||||
is discarded, and it must contain column names that exactly match the
|
||||
table's columns, in both number and order; otherwise, an error is raised.
|
||||
The <literal>MATCH</literal> value is only valid for
|
||||
<command>COPY FROM</command> commands.
|
||||
</para>
|
||||
<para>
|
||||
This option is not allowed when using <literal>binary</literal> format.
|
||||
The <literal>MATCH</literal> option is only valid for <command>COPY
|
||||
FROM</command> commands.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract a CopyHeaderChoice value from a DefElem. This is like
|
||||
* defGetBoolean() but also accepts the special value "match".
|
||||
* Extract the CopyFormatOptions.header_line value from a DefElem.
|
||||
*
|
||||
* Parses the HEADER option for COPY, which can be a boolean, a non-negative
|
||||
* integer (number of lines to skip), or the special value "match".
|
||||
*/
|
||||
static CopyHeaderChoice
|
||||
defGetCopyHeaderChoice(DefElem *def, bool is_from)
|
||||
static int
|
||||
defGetCopyHeaderOption(DefElem *def, bool is_from)
|
||||
{
|
||||
/*
|
||||
* If no parameter value given, assume "true" is meant.
|
||||
@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
|
||||
return COPY_HEADER_TRUE;
|
||||
|
||||
/*
|
||||
* Allow 0, 1, "true", "false", "on", "off", or "match".
|
||||
* Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
|
||||
* "match".
|
||||
*/
|
||||
switch (nodeTag(def->arg))
|
||||
{
|
||||
case T_Integer:
|
||||
switch (intVal(def->arg))
|
||||
{
|
||||
case 0:
|
||||
return COPY_HEADER_FALSE;
|
||||
case 1:
|
||||
return COPY_HEADER_TRUE;
|
||||
default:
|
||||
/* otherwise, error out below */
|
||||
break;
|
||||
int ival = intVal(def->arg);
|
||||
|
||||
if (ival < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("a negative integer value cannot be "
|
||||
"specified for %s", def->defname)));
|
||||
|
||||
if (!is_from && ival > 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot use multi-line header in COPY TO")));
|
||||
|
||||
return ival;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("%s requires a Boolean value or \"match\"",
|
||||
errmsg("%s requires a Boolean value, a non-negative integer, "
|
||||
"or the string \"match\"",
|
||||
def->defname)));
|
||||
return COPY_HEADER_FALSE; /* keep compiler quiet */
|
||||
}
|
||||
@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
|
||||
if (header_specified)
|
||||
errorConflictingDefElem(defel, pstate);
|
||||
header_specified = true;
|
||||
opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
|
||||
opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
|
||||
}
|
||||
else if (strcmp(defel->defname, "quote") == 0)
|
||||
{
|
||||
@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
|
||||
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
|
||||
|
||||
/* Check header */
|
||||
if (opts_out->binary && opts_out->header_line)
|
||||
if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
|
||||
|
@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
|
||||
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
|
||||
{
|
||||
int fldct;
|
||||
bool done;
|
||||
bool done = false;
|
||||
|
||||
/* only available for text or csv input */
|
||||
Assert(!cstate->opts.binary);
|
||||
|
||||
/* on input check that the header line is correct if needed */
|
||||
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
|
||||
if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
|
||||
{
|
||||
ListCell *cur;
|
||||
TupleDesc tupDesc;
|
||||
int lines_to_skip = cstate->opts.header_line;
|
||||
|
||||
/* If set to "match", one header line is skipped */
|
||||
if (cstate->opts.header_line == COPY_HEADER_MATCH)
|
||||
lines_to_skip = 1;
|
||||
|
||||
tupDesc = RelationGetDescr(cstate->rel);
|
||||
|
||||
for (int i = 0; i < lines_to_skip; i++)
|
||||
{
|
||||
cstate->cur_lineno++;
|
||||
done = CopyReadLine(cstate, is_csv);
|
||||
if ((done = CopyReadLine(cstate, is_csv)))
|
||||
break;
|
||||
}
|
||||
|
||||
if (cstate->opts.header_line == COPY_HEADER_MATCH)
|
||||
{
|
||||
|
@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
|
||||
cstate->file_encoding);
|
||||
|
||||
/* if a header has been requested send the line */
|
||||
if (cstate->opts.header_line)
|
||||
if (cstate->opts.header_line == COPY_HEADER_TRUE)
|
||||
{
|
||||
ListCell *cur;
|
||||
bool hdr_delim = false;
|
||||
|
@ -20,15 +20,12 @@
|
||||
#include "tcop/dest.h"
|
||||
|
||||
/*
|
||||
* Represents whether a header line should be present, and whether it must
|
||||
* match the actual names (which implies "true").
|
||||
* Represents whether a header line must match the actual names
|
||||
* (which implies "true"), and whether it should be present.
|
||||
*/
|
||||
typedef enum CopyHeaderChoice
|
||||
{
|
||||
COPY_HEADER_FALSE = 0,
|
||||
COPY_HEADER_TRUE,
|
||||
COPY_HEADER_MATCH,
|
||||
} CopyHeaderChoice;
|
||||
#define COPY_HEADER_MATCH -1
|
||||
#define COPY_HEADER_FALSE 0
|
||||
#define COPY_HEADER_TRUE 1
|
||||
|
||||
/*
|
||||
* Represents where to save input processing errors. More values to be added
|
||||
@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
|
||||
bool binary; /* binary format? */
|
||||
bool freeze; /* freeze rows on loading? */
|
||||
bool csv_mode; /* Comma Separated Value format? */
|
||||
CopyHeaderChoice header_line; /* header line? */
|
||||
int header_line; /* number of lines to skip or COPY_HEADER_XXX
|
||||
* value (see the above) */
|
||||
char *null_print; /* NULL marker string (server encoding!) */
|
||||
int null_print_len; /* length of same */
|
||||
char *null_print_client; /* same converted to file encoding */
|
||||
|
@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
|
||||
c1 colname with tab: \t
|
||||
1 a
|
||||
2 b
|
||||
-- test multi-line header line feature
|
||||
create temp table copytest5 (c1 int);
|
||||
copy copytest5 from stdin (format csv, header 2);
|
||||
copy copytest5 to stdout (header);
|
||||
c1
|
||||
1
|
||||
2
|
||||
truncate copytest5;
|
||||
copy copytest5 from stdin (format csv, header 4);
|
||||
select count(*) from copytest5;
|
||||
count
|
||||
-------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
truncate copytest5;
|
||||
copy copytest5 from stdin (format csv, header 5);
|
||||
select count(*) from copytest5;
|
||||
count
|
||||
-------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
-- test copy from with a partitioned table
|
||||
create table parted_copytest (
|
||||
a int,
|
||||
@ -224,7 +247,7 @@ alter table header_copytest add column c text;
|
||||
copy header_copytest to stdout with (header match);
|
||||
ERROR: cannot use "match" with HEADER in COPY TO
|
||||
copy header_copytest from stdin with (header wrong_choice);
|
||||
ERROR: header requires a Boolean value or "match"
|
||||
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
|
||||
-- works
|
||||
copy header_copytest from stdin with (header match);
|
||||
copy header_copytest (c, a, b) from stdin with (header match);
|
||||
|
@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
|
||||
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
|
||||
COPY x from stdin with (on_error ignore, reject_limit 0);
|
||||
ERROR: REJECT_LIMIT (0) must be greater than zero
|
||||
COPY x from stdin with (header -1);
|
||||
ERROR: a negative integer value cannot be specified for header
|
||||
COPY x from stdin with (header 2.5);
|
||||
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
|
||||
COPY x to stdout with (header 2);
|
||||
ERROR: cannot use multi-line header in COPY TO
|
||||
-- too many columns in column list: should fail
|
||||
COPY x (a, b, c, d, e, d, c) from stdin;
|
||||
ERROR: column "d" specified more than once
|
||||
|
@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
|
||||
|
||||
copy copytest4 to stdout (header);
|
||||
|
||||
-- test multi-line header line feature
|
||||
|
||||
create temp table copytest5 (c1 int);
|
||||
|
||||
copy copytest5 from stdin (format csv, header 2);
|
||||
this is a first header line.
|
||||
this is a second header line.
|
||||
1
|
||||
2
|
||||
\.
|
||||
copy copytest5 to stdout (header);
|
||||
|
||||
truncate copytest5;
|
||||
copy copytest5 from stdin (format csv, header 4);
|
||||
this is a first header line.
|
||||
this is a second header line.
|
||||
1
|
||||
2
|
||||
\.
|
||||
select count(*) from copytest5;
|
||||
|
||||
truncate copytest5;
|
||||
copy copytest5 from stdin (format csv, header 5);
|
||||
this is a first header line.
|
||||
this is a second header line.
|
||||
1
|
||||
2
|
||||
\.
|
||||
select count(*) from copytest5;
|
||||
|
||||
-- test copy from with a partitioned table
|
||||
create table parted_copytest (
|
||||
a int,
|
||||
|
@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
|
||||
COPY x from stdin (log_verbosity unsupported);
|
||||
COPY x from stdin with (reject_limit 1);
|
||||
COPY x from stdin with (on_error ignore, reject_limit 0);
|
||||
COPY x from stdin with (header -1);
|
||||
COPY x from stdin with (header 2.5);
|
||||
COPY x to stdout with (header 2);
|
||||
|
||||
-- too many columns in column list: should fail
|
||||
COPY x (a, b, c, d, e, d, c) from stdin;
|
||||
|
@ -521,7 +521,6 @@ CopyFormatOptions
|
||||
CopyFromRoutine
|
||||
CopyFromState
|
||||
CopyFromStateData
|
||||
CopyHeaderChoice
|
||||
CopyInsertMethod
|
||||
CopyLogVerbosityChoice
|
||||
CopyMethod
|
||||
|
Reference in New Issue
Block a user