mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Add header matching mode to COPY FROM
COPY FROM supports the HEADER option to silently discard the header line from a CSV or text file. It is possible to load by mistake a file that matches the expected format, for example, if two text columns have been swapped, resulting in garbage in the database. This adds a new option value HEADER MATCH that checks the column names in the header line against the actual column names and errors out if they do not match. Author: Rémi Lapeyre <remi.lapeyre@lenstra.fr> Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Discussion: https://www.postgresql.org/message-id/flat/CAF1-J-0PtCWMeLtswwGV2M70U26n4g33gpe1rcKQqe6wVQDrFA@mail.gmail.com
This commit is contained in:
		@@ -313,6 +313,64 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 | 
			
		||||
		table_close(rel, NoLock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Extract a CopyHeaderChoice value from a DefElem.  This is like
 | 
			
		||||
 * defGetBoolean() but also accepts the special value "match".
 | 
			
		||||
 */
 | 
			
		||||
static CopyHeaderChoice
 | 
			
		||||
defGetCopyHeaderChoice(DefElem *def)
 | 
			
		||||
{
 | 
			
		||||
	/*
 | 
			
		||||
	 * If no parameter given, assume "true" is meant.
 | 
			
		||||
	 */
 | 
			
		||||
	if (def->arg == NULL)
 | 
			
		||||
		return COPY_HEADER_TRUE;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Allow 0, 1, "true", "false", "on", "off", or "match".
 | 
			
		||||
	 */
 | 
			
		||||
	switch (nodeTag(def->arg))
 | 
			
		||||
	{
 | 
			
		||||
		case T_Integer:
 | 
			
		||||
			switch (intVal(def->arg))
 | 
			
		||||
			{
 | 
			
		||||
				case 0:
 | 
			
		||||
					return COPY_HEADER_FALSE;
 | 
			
		||||
				case 1:
 | 
			
		||||
					return COPY_HEADER_TRUE;
 | 
			
		||||
				default:
 | 
			
		||||
					/* otherwise, error out below */
 | 
			
		||||
					break;
 | 
			
		||||
			}
 | 
			
		||||
			break;
 | 
			
		||||
		default:
 | 
			
		||||
			{
 | 
			
		||||
				char	*sval = defGetString(def);
 | 
			
		||||
 | 
			
		||||
				/*
 | 
			
		||||
				 * The set of strings accepted here should match up with the
 | 
			
		||||
				 * grammar's opt_boolean_or_string production.
 | 
			
		||||
				 */
 | 
			
		||||
				if (pg_strcasecmp(sval, "true") == 0)
 | 
			
		||||
					return COPY_HEADER_TRUE;
 | 
			
		||||
				if (pg_strcasecmp(sval, "false") == 0)
 | 
			
		||||
					return COPY_HEADER_FALSE;
 | 
			
		||||
				if (pg_strcasecmp(sval, "on") == 0)
 | 
			
		||||
					return COPY_HEADER_TRUE;
 | 
			
		||||
				if (pg_strcasecmp(sval, "off") == 0)
 | 
			
		||||
					return COPY_HEADER_FALSE;
 | 
			
		||||
				if (pg_strcasecmp(sval, "match") == 0)
 | 
			
		||||
					return COPY_HEADER_MATCH;
 | 
			
		||||
			}
 | 
			
		||||
			break;
 | 
			
		||||
	}
 | 
			
		||||
	ereport(ERROR,
 | 
			
		||||
				(errcode(ERRCODE_SYNTAX_ERROR),
 | 
			
		||||
				 errmsg("%s requires a Boolean value or \"match\"",
 | 
			
		||||
					def->defname)));
 | 
			
		||||
	return COPY_HEADER_FALSE;	/* keep compiler quiet */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Process the statement option list for COPY.
 | 
			
		||||
 *
 | 
			
		||||
@@ -394,7 +452,7 @@ ProcessCopyOptions(ParseState *pstate,
 | 
			
		||||
			if (header_specified)
 | 
			
		||||
				errorConflictingDefElem(defel, pstate);
 | 
			
		||||
			header_specified = true;
 | 
			
		||||
			opts_out->header_line = defGetBoolean(defel);
 | 
			
		||||
			opts_out->header_line = defGetCopyHeaderChoice(defel);
 | 
			
		||||
		}
 | 
			
		||||
		else if (strcmp(defel->defname, "quote") == 0)
 | 
			
		||||
		{
 | 
			
		||||
 
 | 
			
		||||
@@ -72,6 +72,7 @@
 | 
			
		||||
#include "miscadmin.h"
 | 
			
		||||
#include "pgstat.h"
 | 
			
		||||
#include "port/pg_bswap.h"
 | 
			
		||||
#include "utils/builtins.h"
 | 
			
		||||
#include "utils/memutils.h"
 | 
			
		||||
#include "utils/rel.h"
 | 
			
		||||
 | 
			
		||||
@@ -758,12 +759,58 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
 | 
			
		||||
	/* only available for text or csv input */
 | 
			
		||||
	Assert(!cstate->opts.binary);
 | 
			
		||||
 | 
			
		||||
	/* on input just throw the header line away */
 | 
			
		||||
	/* on input check that the header line is correct if needed */
 | 
			
		||||
	if (cstate->cur_lineno == 0 && cstate->opts.header_line)
 | 
			
		||||
	{
 | 
			
		||||
		ListCell   *cur;
 | 
			
		||||
		TupleDesc	tupDesc;
 | 
			
		||||
 | 
			
		||||
		tupDesc = RelationGetDescr(cstate->rel);
 | 
			
		||||
 | 
			
		||||
		cstate->cur_lineno++;
 | 
			
		||||
		if (CopyReadLine(cstate))
 | 
			
		||||
			return false;		/* done */
 | 
			
		||||
		done = CopyReadLine(cstate);
 | 
			
		||||
 | 
			
		||||
		if (cstate->opts.header_line == COPY_HEADER_MATCH)
 | 
			
		||||
		{
 | 
			
		||||
			int			fldnum;
 | 
			
		||||
 | 
			
		||||
			if (cstate->opts.csv_mode)
 | 
			
		||||
				fldct = CopyReadAttributesCSV(cstate);
 | 
			
		||||
			else
 | 
			
		||||
				fldct = CopyReadAttributesText(cstate);
 | 
			
		||||
 | 
			
		||||
			if (fldct != list_length(cstate->attnumlist))
 | 
			
		||||
				ereport(ERROR,
 | 
			
		||||
						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
 | 
			
		||||
						 errmsg("wrong number of fields in header line: field count is %d, expected %d",
 | 
			
		||||
								fldct, list_length(cstate->attnumlist))));
 | 
			
		||||
 | 
			
		||||
			fldnum = 0;
 | 
			
		||||
			foreach(cur, cstate->attnumlist)
 | 
			
		||||
			{
 | 
			
		||||
				int			attnum = lfirst_int(cur);
 | 
			
		||||
				char	   *colName = cstate->raw_fields[attnum - 1];
 | 
			
		||||
				Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
 | 
			
		||||
 | 
			
		||||
				fldnum++;
 | 
			
		||||
 | 
			
		||||
				if (colName == NULL)
 | 
			
		||||
					ereport(ERROR,
 | 
			
		||||
							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
 | 
			
		||||
							 errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
 | 
			
		||||
									fldnum, cstate->opts.null_print, NameStr(attr->attname))));
 | 
			
		||||
 | 
			
		||||
				if (namestrcmp(&attr->attname, colName) != 0) {
 | 
			
		||||
					ereport(ERROR,
 | 
			
		||||
							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
 | 
			
		||||
							 errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
 | 
			
		||||
									fldnum, colName, NameStr(attr->attname))));
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (done)
 | 
			
		||||
			return false;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cstate->cur_lineno++;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user