1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-08 11:42:09 +03:00

Provide a FORCE NULL option to COPY in CSV mode.

This forces an input field containing the quoted null string to be
returned as a NULL. Without this option, only unquoted null strings
behave this way. This helps where some CSV producers insist on quoting
every field, whether or not it is needed. The option takes a list of
fields, and only applies to those columns. There is an equivalent
column-level option added to file_fdw.

Ian Barwick, with some tweaking by Andrew Dunstan, reviewed by Payal
Singh.
This commit is contained in:
Andrew Dunstan
2014-03-04 17:31:59 -05:00
parent e2a0fc5363
commit 3b5e03dca2
10 changed files with 308 additions and 35 deletions

View File

@ -125,6 +125,8 @@ typedef struct CopyStateData
bool *force_quote_flags; /* per-column CSV FQ flags */
List *force_notnull; /* list of column names */
bool *force_notnull_flags; /* per-column CSV FNN flags */
List *force_null; /* list of column names */
bool *force_null_flags; /* per-column CSV FN flags */
bool convert_selectively; /* do selective binary conversion? */
List *convert_select; /* list of column names (can be NIL) */
bool *convert_select_flags; /* per-column CSV/TEXT CS flags */
@ -1019,6 +1021,20 @@ ProcessCopyOptions(CopyState cstate,
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname)));
}
else if (strcmp(defel->defname, "force_null") == 0)
{
if (cstate->force_null)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
if (defel->arg && IsA(defel->arg, List))
cstate->force_null = (List *) defel->arg;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname)));
}
else if (strcmp(defel->defname, "convert_selectively") == 0)
{
/*
@ -1178,6 +1194,17 @@ ProcessCopyOptions(CopyState cstate,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force not null only available using COPY FROM")));
/* Check force_null */
if (!cstate->csv_mode && cstate->force_null != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force null available only in CSV mode")));
if (cstate->force_null != NIL && !is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force null only available using COPY FROM")));
/* Don't allow the delimiter to appear in the null string. */
if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
ereport(ERROR,
@ -1385,6 +1412,28 @@ BeginCopy(bool is_from,
}
}
/* Convert FORCE NULL name list to per-column flags, check validity */
cstate->force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
if (cstate->force_null)
{
List *attnums;
ListCell *cur;
attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_null);
foreach(cur, attnums)
{
int attnum = lfirst_int(cur);
if (!list_member_int(cstate->attnumlist, attnum))
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("FORCE NULL column \"%s\" not referenced by COPY",
NameStr(tupDesc->attrs[attnum - 1]->attname))));
cstate->force_null_flags[attnum - 1] = true;
}
}
/* Convert convert_selectively name list to per-column flags */
if (cstate->convert_selectively)
{
@ -2810,11 +2859,28 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
continue;
}
if (cstate->csv_mode && string == NULL &&
cstate->force_notnull_flags[m])
if (cstate->csv_mode)
{
/* Go ahead and read the NULL string */
string = cstate->null_print;
if(string == NULL &&
cstate->force_notnull_flags[m])
{
/*
* FORCE_NOT_NULL option is set and column is NULL -
* convert it to the NULL string.
*/
string = cstate->null_print;
}
else if(string != NULL && cstate->force_null_flags[m]
&& strcmp(string,cstate->null_print) == 0 )
{
/*
* FORCE_NULL option is set and column matches the NULL string.
* It must have been quoted, or otherwise the string would already
* have been set to NULL.
* Convert it to NULL as specified.
*/
string = NULL;
}
}
cstate->cur_attname = NameStr(attr[m]->attname);