diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 74dbb709fe7..be2e3d7354f 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -3,6 +3,12 @@
  * copyfrom.c
  *		COPY <table> FROM file/program/client
  *
+ * This file contains routines needed to efficiently load tuples into a
+ * table.  That includes looking up the correct partition, firing triggers,
+ * calling the table AM function to insert the data, and updating indexes.
+ * Reading data from the input file or client and parsing it into Datums
+ * is handled in copyfromparse.c.
+ *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -23,6 +29,7 @@
 #include "access/tableam.h"
 #include "access/xact.h"
 #include "access/xlog.h"
+#include "catalog/namespace.h"
 #include "commands/copy.h"
 #include "commands/copyfrom_internal.h"
 #include "commands/progress.h"
@@ -87,7 +94,7 @@ typedef struct CopyMultiInsertInfo
 	List	   *multiInsertBuffers; /* List of tracked CopyMultiInsertBuffers */
 	int			bufferedTuples; /* number of tuples buffered over all buffers */
 	int			bufferedBytes;	/* number of bytes from all buffered tuples */
-	CopyFromState	cstate;			/* Copy state for this CopyMultiInsertInfo */
+	CopyFromState cstate;		/* Copy state for this CopyMultiInsertInfo */
 	EState	   *estate;			/* Executor state used for COPY */
 	CommandId	mycid;			/* Command Id used for COPY */
 	int			ti_options;		/* table insert options */
@@ -107,7 +114,7 @@ static void ClosePipeFromProgram(CopyFromState cstate);
 void
 CopyFromErrorCallback(void *arg)
 {
-	CopyFromState	cstate = (CopyFromState) arg;
+	CopyFromState cstate = (CopyFromState) arg;
 	char		curlineno_str[32];
 
 	snprintf(curlineno_str, sizeof(curlineno_str), UINT64_FORMAT,
@@ -149,15 +156,9 @@ CopyFromErrorCallback(void *arg)
 			/*
 			 * Error is relevant to a particular line.
 			 *
-			 * If line_buf still contains the correct line, and it's already
-			 * transcoded, print it. If it's still in a foreign encoding, it's
-			 * quite likely that the error is precisely a failure to do
-			 * encoding conversion (ie, bad data). We dare not try to convert
-			 * it, and at present there's no way to regurgitate it without
-			 * conversion. So we have to punt and just report the line number.
+			 * If line_buf still contains the correct line, print it.
 			 */
-			if (cstate->line_buf_valid &&
-				(cstate->line_buf_converted || !cstate->need_transcoding))
+			if (cstate->line_buf_valid)
 			{
 				char	   *lineval;
 
@@ -300,7 +301,7 @@ CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo,
 	MemoryContext oldcontext;
 	int			i;
 	uint64		save_cur_lineno;
-	CopyFromState	cstate = miinfo->cstate;
+	CopyFromState cstate = miinfo->cstate;
 	EState	   *estate = miinfo->estate;
 	CommandId	mycid = miinfo->mycid;
 	int			ti_options = miinfo->ti_options;
@@ -1191,7 +1192,7 @@ BeginCopyFrom(ParseState *pstate,
 			  List *attnamelist,
 			  List *options)
 {
-	CopyFromState	cstate;
+	CopyFromState cstate;
 	bool		pipe = (filename == NULL);
 	TupleDesc	tupDesc;
 	AttrNumber	num_phys_attrs,
@@ -1229,7 +1230,7 @@ BeginCopyFrom(ParseState *pstate,
 	oldcontext = MemoryContextSwitchTo(cstate->copycontext);
 
 	/* Extract options from the statement node tree */
-	ProcessCopyOptions(pstate, &cstate->opts, true /* is_from */, options);
+	ProcessCopyOptions(pstate, &cstate->opts, true /* is_from */ , options);
 
 	/* Process the target relation */
 	cstate->rel = rel;
@@ -1320,15 +1321,20 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->file_encoding = cstate->opts.file_encoding;
 
 	/*
-	 * Set up encoding conversion info.  Even if the file and server encodings
-	 * are the same, we must apply pg_any_to_server() to validate data in
-	 * multibyte encodings.
+	 * Look up encoding conversion function.
 	 */
-	cstate->need_transcoding =
-		(cstate->file_encoding != GetDatabaseEncoding() ||
-		 pg_database_encoding_max_length() > 1);
-	/* See Multibyte encoding comment above */
-	cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
+	if (cstate->file_encoding == GetDatabaseEncoding() ||
+		cstate->file_encoding == PG_SQL_ASCII ||
+		GetDatabaseEncoding() == PG_SQL_ASCII)
+	{
+		cstate->need_transcoding = false;
+	}
+	else
+	{
+		cstate->need_transcoding = true;
+		cstate->conversion_proc = FindDefaultConversionProc(cstate->file_encoding,
+															GetDatabaseEncoding());
+	}
 
 	cstate->copy_src = COPY_FILE;	/* default */
 
@@ -1339,7 +1345,6 @@ BeginCopyFrom(ParseState *pstate,
 	oldcontext = MemoryContextSwitchTo(cstate->copycontext);
 
 	/* Initialize state variables */
-	cstate->reached_eof = false;
 	cstate->eol_type = EOL_UNKNOWN;
 	cstate->cur_relname = RelationGetRelationName(cstate->rel);
 	cstate->cur_lineno = 0;
@@ -1347,19 +1352,36 @@ BeginCopyFrom(ParseState *pstate,
 	cstate->cur_attval = NULL;
 
 	/*
-	 * Set up variables to avoid per-attribute overhead.  attribute_buf and
-	 * raw_buf are used in both text and binary modes, but we use line_buf
-	 * only in text mode.
+	 * Allocate buffers for the input pipeline.
+	 *
+	 * attribute_buf and raw_buf are used in both text and binary modes, but
+	 * input_buf and line_buf only in text mode.
 	 */
-	initStringInfo(&cstate->attribute_buf);
-	cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
+	cstate->raw_buf = palloc(RAW_BUF_SIZE + 1);
 	cstate->raw_buf_index = cstate->raw_buf_len = 0;
+	cstate->raw_reached_eof = false;
+
 	if (!cstate->opts.binary)
 	{
+		/*
+		 * If encoding conversion is needed, we need another buffer to hold
+		 * the converted input data.  Otherwise, we can just point input_buf
+		 * to the same buffer as raw_buf.
+		 */
+		if (cstate->need_transcoding)
+		{
+			cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1);
+			cstate->input_buf_index = cstate->input_buf_len = 0;
+		}
+		else
+			cstate->input_buf = cstate->raw_buf;
+		cstate->input_reached_eof = false;
+
 		initStringInfo(&cstate->line_buf);
-		cstate->line_buf_converted = false;
 	}
 
+	initStringInfo(&cstate->attribute_buf);
+
 	/* Assign range table, we'll need it in CopyFrom. */
 	if (pstate)
 		cstate->range_table = pstate->p_rtable;
@@ -1584,7 +1606,7 @@ ClosePipeFromProgram(CopyFromState cstate)
 		 * should not report that as an error.  Otherwise, SIGPIPE indicates a
 		 * problem.
 		 */
-		if (!cstate->reached_eof &&
+		if (!cstate->raw_reached_eof &&
 			wait_result_is_signal(pclose_rc, SIGPIPE))
 			return;
 
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index ce24a1528bd..0813424768f 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -3,6 +3,50 @@
  * copyfromparse.c
  *		Parse CSV/text/binary format for COPY FROM.
  *
+ * This file contains routines to parse the text, CSV and binary input
+ * formats.  The main entry point is NextCopyFrom(), which parses the
+ * next input line and returns it as Datums.
+ *
+ * In text/CSV mode, the parsing happens in multiple stages:
+ *
+ * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
+ *                1.          2.            3.           4.
+ *
+ * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
+ *    places it into 'raw_buf'.
+ *
+ * 2. CopyConvertBuf() calls the encoding conversion function to convert
+ *    the data in 'raw_buf' from client to server encoding, placing the
+ *    converted result in 'input_buf'.
+ *
+ * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
+ *    It is responsible for finding the next newline marker, taking quote and
+ *    escape characters into account according to the COPY options.  The line
+ *    is copied into 'line_buf', with quotes and escape characters still
+ *    intact.
+ *
+ * 4. CopyReadAttributesText/CSV() function takes the input line from
+ *    'line_buf', and splits it into fields, unescaping the data as required.
+ *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
+ *    pointers to each field.
+ *
+ * If encoding conversion is not required, a shortcut is taken in step 2 to
+ * avoid copying the data unnecessarily.  The 'input_buf' pointer is set to
+ * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
+ * directly into 'input_buf'.  CopyConvertBuf() then merely validates that
+ * the data is valid in the current encoding.
+ *
+ * In binary mode, the pipeline is much simpler.  Input is loaded into
+ * into 'raw_buf', and encoding conversion is done in the datatype-specific
+ * receive functions, if required.  'input_buf' and 'line_buf' are not used,
+ * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
+ * data when it's passed the receive function.
+ *
+ * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE).  'input_buf' is also
+ * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required.  'line_buf'
+ * and 'attribute_buf' are expanded on demand, to hold the longest line
+ * encountered so far.
+ *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -35,7 +79,7 @@
 #define OCTVALUE(c) ((c) - '0')
 
 /*
- * These macros centralize code used to process line_buf and raw_buf buffers.
+ * These macros centralize code used to process line_buf and input_buf buffers.
  * They are macros because they often do continue/break control and to avoid
  * function call overhead in tight COPY loops.
  *
@@ -53,9 +97,9 @@
 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
 if (1) \
 { \
-	if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
+	if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
 	{ \
-		raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
+		input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
 		need_data = true; \
 		continue; \
 	} \
@@ -65,10 +109,10 @@ if (1) \
 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
 if (1) \
 { \
-	if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
+	if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
 	{ \
 		if (extralen) \
-			raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
+			input_buf_ptr = copy_buf_len; /* consume the partial character */ \
 		/* backslash just before EOF, treat as data char */ \
 		result = true; \
 		break; \
@@ -77,17 +121,17 @@ if (1) \
 
 /*
  * Transfer any approved data to line_buf; must do this to be sure
- * there is some room in raw_buf.
+ * there is some room in input_buf.
  */
 #define REFILL_LINEBUF \
 if (1) \
 { \
-	if (raw_buf_ptr > cstate->raw_buf_index) \
+	if (input_buf_ptr > cstate->input_buf_index) \
 	{ \
 		appendBinaryStringInfo(&cstate->line_buf, \
-							 cstate->raw_buf + cstate->raw_buf_index, \
-							   raw_buf_ptr - cstate->raw_buf_index); \
-		cstate->raw_buf_index = raw_buf_ptr; \
+							 cstate->input_buf + cstate->input_buf_index, \
+							   input_buf_ptr - cstate->input_buf_index); \
+		cstate->input_buf_index = input_buf_ptr; \
 	} \
 } else ((void) 0)
 
@@ -95,7 +139,7 @@ if (1) \
 #define NO_END_OF_COPY_GOTO \
 if (1) \
 { \
-	raw_buf_ptr = prev_raw_ptr + 1; \
+	input_buf_ptr = prev_raw_ptr + 1; \
 	goto not_end_of_copy; \
 } else ((void) 0)
 
@@ -118,7 +162,7 @@ static int	CopyGetData(CopyFromState cstate, void *databuf,
 						int minread, int maxread);
 static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
 static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
-static bool CopyLoadRawBuf(CopyFromState cstate);
+static void CopyLoadInputBuf(CopyFromState cstate);
 static int	CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
 
 void
@@ -210,10 +254,10 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
 						(errcode_for_file_access(),
 						 errmsg("could not read from COPY file: %m")));
 			if (bytesread == 0)
-				cstate->reached_eof = true;
+				cstate->raw_reached_eof = true;
 			break;
 		case COPY_FRONTEND:
-			while (maxread > 0 && bytesread < minread && !cstate->reached_eof)
+			while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
 			{
 				int			avail;
 
@@ -241,7 +285,7 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
 							break;
 						case 'c':	/* CopyDone */
 							/* COPY IN correctly terminated by frontend */
-							cstate->reached_eof = true;
+							cstate->raw_reached_eof = true;
 							return bytesread;
 						case 'f':	/* CopyFail */
 							ereport(ERROR,
@@ -327,34 +371,303 @@ CopyGetInt16(CopyFromState cstate, int16 *val)
 
 
 /*
- * CopyLoadRawBuf loads some more data into raw_buf
+ * Perform encoding conversion on data in 'raw_buf', writing the converted
+ * data into 'input_buf'.
  *
- * Returns true if able to obtain at least one more byte, else false.
- *
- * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
- * of the buffer and then we load more data after that.  This case occurs only
- * when a multibyte character crosses a bufferload boundary.
+ * On entry, there must be some data to convert in 'raw_buf'.
  */
-static bool
+static void
+CopyConvertBuf(CopyFromState cstate)
+{
+	/*
+	 * If the file and server encoding are the same, no encoding conversion is
+	 * required.  However, we still need to verify that the input is valid for
+	 * the encoding.
+	 */
+	if (!cstate->need_transcoding)
+	{
+		/*
+		 * When conversion is not required, input_buf and raw_buf are the
+		 * same.  raw_buf_len is the total number of bytes in the buffer, and
+		 * input_buf_len tracks how many of those bytes have already been
+		 * verified.
+		 */
+		int			preverifiedlen = cstate->input_buf_len;
+		int			unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
+		int			nverified;
+
+		if (unverifiedlen == 0)
+		{
+			/*
+			 * If no more raw data is coming, report the EOF to the caller.
+			 */
+			if (cstate->raw_reached_eof)
+				cstate->input_reached_eof = true;
+			return;
+		}
+
+		/*
+		 * Verify the new data, including any residual unverified bytes from
+		 * previous round.
+		 */
+		nverified = pg_encoding_verifymbstr(cstate->file_encoding,
+											cstate->raw_buf + preverifiedlen,
+											unverifiedlen);
+		if (nverified == 0)
+		{
+			/*
+			 * Could not verify anything.
+			 *
+			 * If there is no more raw input data coming, it means that there
+			 * was an incomplete multi-byte sequence at the end.  Also, if
+			 * there's "enough" input left, we should be able to verify at
+			 * least one character, and a failure to do so means that we've
+			 * hit an invalid byte sequence.
+			 */
+			if (cstate->raw_reached_eof || unverifiedlen >= pg_database_encoding_max_length())
+				cstate->input_reached_error = true;
+			return;
+		}
+		cstate->input_buf_len += nverified;
+	}
+	else
+	{
+		/*
+		 * Encoding conversion is needed.
+		 */
+		int			nbytes;
+		unsigned char *src;
+		int			srclen;
+		unsigned char *dst;
+		int			dstlen;
+		int			convertedlen;
+
+		if (RAW_BUF_BYTES(cstate) == 0)
+		{
+			/*
+			 * If no more raw data is coming, report the EOF to the caller.
+			 */
+			if (cstate->raw_reached_eof)
+				cstate->input_reached_eof = true;
+			return;
+		}
+
+		/*
+		 * First, copy down any unprocessed data.
+		 */
+		nbytes = INPUT_BUF_BYTES(cstate);
+		if (nbytes > 0 && cstate->input_buf_index > 0)
+			memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
+					nbytes);
+		cstate->input_buf_index = 0;
+		cstate->input_buf_len = nbytes;
+		cstate->input_buf[nbytes] = '\0';
+
+		src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
+		srclen = cstate->raw_buf_len - cstate->raw_buf_index;
+		dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
+		dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
+
+		/*
+		 * Do the conversion.  This might stop short, if there is an invalid
+		 * byte sequence in the input.  We'll convert as much as we can in
+		 * that case.
+		 *
+		 * Note: Even if we hit an invalid byte sequence, we don't report the
+		 * error until all the valid bytes have been consumed.  The input
+		 * might contain an end-of-input marker (\.), and we don't want to
+		 * report an error if the invalid byte sequence is after the
+		 * end-of-input marker.  We might unnecessarily convert some data
+		 * after the end-of-input marker as long as it's valid for the
+		 * encoding, but that's harmless.
+		 */
+		convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
+													 cstate->file_encoding,
+													 GetDatabaseEncoding(),
+													 src, srclen,
+													 dst, dstlen,
+													 true);
+		if (convertedlen == 0)
+		{
+			/*
+			 * Could not convert anything.  If there is no more raw input data
+			 * coming, it means that there was an incomplete multi-byte
+			 * sequence at the end.  Also, if there is plenty of input left,
+			 * we should be able to convert at least one character, so a
+			 * failure to do so must mean that we've hit a byte sequence
+			 * that's invalid.
+			 */
+			if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
+				cstate->input_reached_error = true;
+			return;
+		}
+		cstate->raw_buf_index += convertedlen;
+		cstate->input_buf_len += strlen((char *) dst);
+	}
+}
+
+/*
+ * Report an encoding or conversion error.
+ */
+static void
+CopyConversionError(CopyFromState cstate)
+{
+	Assert(cstate->raw_buf_len > 0);
+	Assert(cstate->input_reached_error);
+
+	if (!cstate->need_transcoding)
+	{
+		/*
+		 * Everything up to input_buf_len was successfully verified, and
+		 * input_buf_len points to the invalid or incomplete character.
+		 */
+		report_invalid_encoding(cstate->file_encoding,
+								cstate->raw_buf + cstate->input_buf_len,
+								cstate->raw_buf_len - cstate->input_buf_len);
+	}
+	else
+	{
+		/*
+		 * raw_buf_index points to the invalid or untranslatable character. We
+		 * let the conversion routine report the error, because it can provide
+		 * a more specific error message than we could here.  An earlier call
+		 * to the conversion routine in CopyConvertBuf() detected that there
+		 * is an error, now we call the conversion routine again with
+		 * noError=false, to have it throw the error.
+		 */
+		unsigned char *src;
+		int			srclen;
+		unsigned char *dst;
+		int			dstlen;
+
+		src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
+		srclen = cstate->raw_buf_len - cstate->raw_buf_index;
+		dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
+		dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
+
+		(void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
+											 cstate->file_encoding,
+											 GetDatabaseEncoding(),
+											 src, srclen,
+											 dst, dstlen,
+											 false);
+
+		/*
+		 * The conversion routine should have reported an error, so this
+		 * should not be reached.
+		 */
+		elog(ERROR, "encoding conversion failed without error");
+	}
+}
+
+/*
+ * Load more data from data source to raw_buf.
+ *
+ * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
+ * beginning of the buffer, and we load new data after that.
+ */
+static void
 CopyLoadRawBuf(CopyFromState cstate)
 {
-	int			nbytes = RAW_BUF_BYTES(cstate);
+	int			nbytes;
 	int			inbytes;
 
-	/* Copy down the unprocessed data if any. */
-	if (nbytes > 0)
+	/*
+	 * In text mode, if encoding conversion is not required, raw_buf and
+	 * input_buf point to the same buffer.  Their len/index better agree, too.
+	 */
+	if (cstate->raw_buf == cstate->input_buf)
+	{
+		Assert(!cstate->need_transcoding);
+		Assert(cstate->raw_buf_index == cstate->input_buf_index);
+		Assert(cstate->input_buf_len <= cstate->raw_buf_len);
+	}
+
+	/*
+	 * Copy down the unprocessed data if any.
+	 */
+	nbytes = RAW_BUF_BYTES(cstate);
+	if (nbytes > 0 && cstate->raw_buf_index > 0)
 		memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
 				nbytes);
+	cstate->raw_buf_len -= cstate->raw_buf_index;
+	cstate->raw_buf_index = 0;
 
-	inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
-						  1, RAW_BUF_SIZE - nbytes);
+	/*
+	 * If raw_buf and input_buf are in fact the same buffer, adjust the
+	 * input_buf variables, too.
+	 */
+	if (cstate->raw_buf == cstate->input_buf)
+	{
+		cstate->input_buf_len -= cstate->input_buf_index;
+		cstate->input_buf_index = 0;
+	}
+
+	/* Load more data */
+	inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
+						  1, RAW_BUF_SIZE - cstate->raw_buf_len);
 	nbytes += inbytes;
 	cstate->raw_buf[nbytes] = '\0';
-	cstate->raw_buf_index = 0;
 	cstate->raw_buf_len = nbytes;
+
 	cstate->bytes_processed += inbytes;
 	pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
-	return (inbytes > 0);
+
+	if (inbytes == 0)
+		cstate->raw_reached_eof = true;
+}
+
+/*
+ * CopyLoadInputBuf loads some more data into input_buf
+ *
+ * On return, at least one more input character is loaded into
+ * input_buf, or input_reached_eof is set.
+ *
+ * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
+ * of the buffer and then we load more data after that.
+ */
+static void
+CopyLoadInputBuf(CopyFromState cstate)
+{
+	int			nbytes = INPUT_BUF_BYTES(cstate);
+
+	/*
+	 * The caller has updated input_buf_index to indicate how much of the
+	 * input has been consumed and isn't needed anymore.  If input_buf is the
+	 * same physical area as raw_buf, update raw_buf_index accordingly.
+	 */
+	if (cstate->raw_buf == cstate->input_buf)
+	{
+		Assert(!cstate->need_transcoding);
+		Assert(cstate->input_buf_index >= cstate->raw_buf_index);
+		cstate->raw_buf_index = cstate->input_buf_index;
+	}
+
+	for (;;)
+	{
+		/* If we now have some unconverted data, try to convert it */
+		CopyConvertBuf(cstate);
+
+		/* If we now have some more input bytes ready, return them */
+		if (INPUT_BUF_BYTES(cstate) > nbytes)
+			return;
+
+		/*
+		 * If we reached an invalid byte sequence, or we're at an incomplete
+		 * multi-byte character but there is no more raw input data, report
+		 * conversion error.
+		 */
+		if (cstate->input_reached_error)
+			CopyConversionError(cstate);
+
+		/* no more input, and everything has been converted */
+		if (cstate->input_reached_eof)
+			break;
+
+		/* Try to load more raw data */
+		Assert(!cstate->raw_reached_eof);
+		CopyLoadRawBuf(cstate);
+	}
 }
 
 /*
@@ -389,7 +702,8 @@ CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
 			/* Load more data if buffer is empty. */
 			if (RAW_BUF_BYTES(cstate) == 0)
 			{
-				if (!CopyLoadRawBuf(cstate))
+				CopyLoadRawBuf(cstate);
+				if (cstate->raw_reached_eof)
 					break;		/* EOF */
 			}
 
@@ -645,8 +959,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 }
 
 /*
- * Read the next input line and stash it in line_buf, with conversion to
- * server encoding.
+ * Read the next input line and stash it in line_buf.
  *
  * Result is true if read was terminated by EOF, false if terminated
  * by newline.  The terminating newline or EOF marker is not included
@@ -658,10 +971,7 @@ CopyReadLine(CopyFromState cstate)
 	bool		result;
 
 	resetStringInfo(&cstate->line_buf);
-	cstate->line_buf_valid = true;
-
-	/* Mark that encoding conversion hasn't occurred yet */
-	cstate->line_buf_converted = false;
+	cstate->line_buf_valid = false;
 
 	/* Parse data and transfer into line_buf */
 	result = CopyReadLineText(cstate);
@@ -675,10 +985,17 @@ CopyReadLine(CopyFromState cstate)
 		 */
 		if (cstate->copy_src == COPY_FRONTEND)
 		{
+			int			inbytes;
+
 			do
 			{
-				cstate->raw_buf_index = cstate->raw_buf_len;
-			} while (CopyLoadRawBuf(cstate));
+				inbytes = CopyGetData(cstate, cstate->input_buf,
+									  1, INPUT_BUF_SIZE);
+			} while (inbytes > 0);
+			cstate->input_buf_index = 0;
+			cstate->input_buf_len = 0;
+			cstate->raw_buf_index = 0;
+			cstate->raw_buf_len = 0;
 		}
 	}
 	else
@@ -715,25 +1032,8 @@ CopyReadLine(CopyFromState cstate)
 		}
 	}
 
-	/* Done reading the line.  Convert it to server encoding. */
-	if (cstate->need_transcoding)
-	{
-		char	   *cvt;
-
-		cvt = pg_any_to_server(cstate->line_buf.data,
-							   cstate->line_buf.len,
-							   cstate->file_encoding);
-		if (cvt != cstate->line_buf.data)
-		{
-			/* transfer converted data back to line_buf */
-			resetStringInfo(&cstate->line_buf);
-			appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
-			pfree(cvt);
-		}
-	}
-
 	/* Now it's safe to use the buffer in error messages */
-	cstate->line_buf_converted = true;
+	cstate->line_buf_valid = true;
 
 	return result;
 }
@@ -744,13 +1044,12 @@ CopyReadLine(CopyFromState cstate)
 static bool
 CopyReadLineText(CopyFromState cstate)
 {
-	char	   *copy_raw_buf;
-	int			raw_buf_ptr;
+	char	   *copy_input_buf;
+	int			input_buf_ptr;
 	int			copy_buf_len;
 	bool		need_data = false;
 	bool		hit_eof = false;
 	bool		result = false;
-	char		mblen_str[2];
 
 	/* CSV variables */
 	bool		first_char_in_line = true;
@@ -768,8 +1067,6 @@ CopyReadLineText(CopyFromState cstate)
 			escapec = '\0';
 	}
 
-	mblen_str[1] = '\0';
-
 	/*
 	 * The objective of this loop is to transfer the entire next input line
 	 * into line_buf.  Hence, we only care for detecting newlines (\r and/or
@@ -782,18 +1079,25 @@ CopyReadLineText(CopyFromState cstate)
 	 * These four characters, and the CSV escape and quote characters, are
 	 * assumed the same in frontend and backend encodings.
 	 *
-	 * For speed, we try to move data from raw_buf to line_buf in chunks
-	 * rather than one character at a time.  raw_buf_ptr points to the next
-	 * character to examine; any characters from raw_buf_index to raw_buf_ptr
-	 * have been determined to be part of the line, but not yet transferred to
-	 * line_buf.
+	 * The input has already been converted to the database encoding.  All
+	 * supported server encodings have the property that all bytes in a
+	 * multi-byte sequence have the high bit set, so a multibyte character
+	 * cannot contain any newline or escape characters embedded in the
+	 * multibyte sequence.  Therefore, we can process the input byte-by-byte,
+	 * regardless of the encoding.
 	 *
-	 * For a little extra speed within the loop, we copy raw_buf and
-	 * raw_buf_len into local variables.
+	 * For speed, we try to move data from input_buf to line_buf in chunks
+	 * rather than one character at a time.  input_buf_ptr points to the next
+	 * character to examine; any characters from input_buf_index to
+	 * input_buf_ptr have been determined to be part of the line, but not yet
+	 * transferred to line_buf.
+	 *
+	 * For a little extra speed within the loop, we copy input_buf and
+	 * input_buf_len into local variables.
 	 */
-	copy_raw_buf = cstate->raw_buf;
-	raw_buf_ptr = cstate->raw_buf_index;
-	copy_buf_len = cstate->raw_buf_len;
+	copy_input_buf = cstate->input_buf;
+	input_buf_ptr = cstate->input_buf_index;
+	copy_buf_len = cstate->input_buf_len;
 
 	for (;;)
 	{
@@ -810,24 +1114,21 @@ CopyReadLineText(CopyFromState cstate)
 		 * cstate->copy_src != COPY_OLD_FE, but it hardly seems worth it,
 		 * considering the size of the buffer.
 		 */
-		if (raw_buf_ptr >= copy_buf_len || need_data)
+		if (input_buf_ptr >= copy_buf_len || need_data)
 		{
 			REFILL_LINEBUF;
 
-			/*
-			 * Try to read some more data.  This will certainly reset
-			 * raw_buf_index to zero, and raw_buf_ptr must go with it.
-			 */
-			if (!CopyLoadRawBuf(cstate))
-				hit_eof = true;
-			raw_buf_ptr = 0;
-			copy_buf_len = cstate->raw_buf_len;
+			CopyLoadInputBuf(cstate);
+			/* update our local variables */
+			hit_eof = cstate->input_reached_eof;
+			input_buf_ptr = cstate->input_buf_index;
+			copy_buf_len = cstate->input_buf_len;
 
 			/*
 			 * If we are completely out of data, break out of the loop,
 			 * reporting EOF.
 			 */
-			if (copy_buf_len <= 0)
+			if (INPUT_BUF_BYTES(cstate) <= 0)
 			{
 				result = true;
 				break;
@@ -836,8 +1137,8 @@ CopyReadLineText(CopyFromState cstate)
 		}
 
 		/* OK to fetch a character */
-		prev_raw_ptr = raw_buf_ptr;
-		c = copy_raw_buf[raw_buf_ptr++];
+		prev_raw_ptr = input_buf_ptr;
+		c = copy_input_buf[input_buf_ptr++];
 
 		if (cstate->opts.csv_mode)
 		{
@@ -891,16 +1192,16 @@ CopyReadLineText(CopyFromState cstate)
 				 * If need more data, go back to loop top to load it.
 				 *
 				 * Note that if we are at EOF, c will wind up as '\0' because
-				 * of the guaranteed pad of raw_buf.
+				 * of the guaranteed pad of input_buf.
 				 */
 				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 
 				/* get next char */
-				c = copy_raw_buf[raw_buf_ptr];
+				c = copy_input_buf[input_buf_ptr];
 
 				if (c == '\n')
 				{
-					raw_buf_ptr++;	/* eat newline */
+					input_buf_ptr++;	/* eat newline */
 					cstate->eol_type = EOL_CRNL;	/* in case not set yet */
 				}
 				else
@@ -967,14 +1268,14 @@ CopyReadLineText(CopyFromState cstate)
 			/* -----
 			 * get next character
 			 * Note: we do not change c so if it isn't \., we can fall
-			 * through and continue processing for file encoding.
+			 * through and continue processing.
 			 * -----
 			 */
-			c2 = copy_raw_buf[raw_buf_ptr];
+			c2 = copy_input_buf[input_buf_ptr];
 
 			if (c2 == '.')
 			{
-				raw_buf_ptr++;	/* consume the '.' */
+				input_buf_ptr++;	/* consume the '.' */
 
 				/*
 				 * Note: if we loop back for more data here, it does not
@@ -986,7 +1287,7 @@ CopyReadLineText(CopyFromState cstate)
 					/* Get the next character */
 					IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 					/* if hit_eof, c2 will become '\0' */
-					c2 = copy_raw_buf[raw_buf_ptr++];
+					c2 = copy_input_buf[input_buf_ptr++];
 
 					if (c2 == '\n')
 					{
@@ -1011,7 +1312,7 @@ CopyReadLineText(CopyFromState cstate)
 				/* Get the next character */
 				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 				/* if hit_eof, c2 will become '\0' */
-				c2 = copy_raw_buf[raw_buf_ptr++];
+				c2 = copy_input_buf[input_buf_ptr++];
 
 				if (c2 != '\r' && c2 != '\n')
 				{
@@ -1036,11 +1337,11 @@ CopyReadLineText(CopyFromState cstate)
 				 * Transfer only the data before the \. into line_buf, then
 				 * discard the data and the \. sequence.
 				 */
-				if (prev_raw_ptr > cstate->raw_buf_index)
+				if (prev_raw_ptr > cstate->input_buf_index)
 					appendBinaryStringInfo(&cstate->line_buf,
-										   cstate->raw_buf + cstate->raw_buf_index,
-										   prev_raw_ptr - cstate->raw_buf_index);
-				cstate->raw_buf_index = raw_buf_ptr;
+										   cstate->input_buf + cstate->input_buf_index,
+										   prev_raw_ptr - cstate->input_buf_index);
+				cstate->input_buf_index = input_buf_ptr;
 				result = true;	/* report EOF */
 				break;
 			}
@@ -1056,15 +1357,8 @@ CopyReadLineText(CopyFromState cstate)
 				 * backslashes are not special, so we want to process the
 				 * character after the backslash just like a normal character,
 				 * so we don't increment in those cases.
-				 *
-				 * Set 'c' to skip whole character correctly in multi-byte
-				 * encodings.  If we don't have the whole character in the
-				 * buffer yet, we might loop back to process it, after all,
-				 * but that's OK because multi-byte characters cannot have any
-				 * special meaning.
 				 */
-				raw_buf_ptr++;
-				c = c2;
+				input_buf_ptr++;
 			}
 		}
 
@@ -1075,30 +1369,6 @@ CopyReadLineText(CopyFromState cstate)
 		 * value, while in non-CSV mode, \. cannot be a data value.
 		 */
 not_end_of_copy:
-
-		/*
-		 * Process all bytes of a multi-byte character as a group.
-		 *
-		 * We only support multi-byte sequences where the first byte has the
-		 * high-bit set, so as an optimization we can avoid this block
-		 * entirely if it is not set.
-		 */
-		if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
-		{
-			int			mblen;
-
-			/*
-			 * It is enough to look at the first byte in all our encodings, to
-			 * get the length.  (GB18030 is a bit special, but still works for
-			 * our purposes; see comment in pg_gb18030_mblen())
-			 */
-			mblen_str[0] = c;
-			mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
-
-			IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
-			IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
-			raw_buf_ptr += mblen - 1;
-		}
 		first_char_in_line = false;
 	}							/* end of outer loop */
 
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index 705f5b615be..858af7a717b 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -52,17 +52,6 @@ typedef enum CopyInsertMethod
 /*
  * This struct contains all the state variables used throughout a COPY FROM
  * operation.
- *
- * Multi-byte encodings: all supported client-side encodings encode multi-byte
- * characters by having the first byte's high bit set. Subsequent bytes of the
- * character can have the high bit not set. When scanning data in such an
- * encoding to look for a match to a single-byte (ie ASCII) character, we must
- * use the full pg_encoding_mblen() machinery to skip over multibyte
- * characters, else we might find a false match to a trailing byte. In
- * supported server encodings, there is no possibility of a false match, and
- * it's faster to make useless comparisons to trailing bytes than it is to
- * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
- * when we have to do it the hard way.
  */
 typedef struct CopyFromStateData
 {
@@ -70,13 +59,11 @@ typedef struct CopyFromStateData
 	CopySource	copy_src;		/* type of copy source */
 	FILE	   *copy_file;		/* used if copy_src == COPY_FILE */
 	StringInfo	fe_msgbuf;		/* used if copy_src == COPY_NEW_FE */
-	bool		reached_eof;	/* true if we read to end of copy data (not
-								 * all copy_src types maintain this) */
 
 	EolType		eol_type;		/* EOL type of input */
 	int			file_encoding;	/* file or remote side's character encoding */
 	bool		need_transcoding;	/* file encoding diff from server? */
-	bool		encoding_embeds_ascii;	/* ASCII can be non-first byte? */
+	Oid			conversion_proc;	/* encoding conversion function */
 
 	/* parameters from the COPY command */
 	Relation	rel;			/* relation to copy from */
@@ -131,31 +118,52 @@ typedef struct CopyFromStateData
 
 	/*
 	 * Similarly, line_buf holds the whole input line being processed. The
-	 * input cycle is first to read the whole line into line_buf, convert it
-	 * to server encoding there, and then extract the individual attribute
-	 * fields into attribute_buf.  line_buf is preserved unmodified so that we
-	 * can display it in error messages if appropriate.  (In binary mode,
-	 * line_buf is not used.)
+	 * input cycle is first to read the whole line into line_buf, and then
+	 * extract the individual attribute fields into attribute_buf.  line_buf
+	 * is preserved unmodified so that we can display it in error messages if
+	 * appropriate.  (In binary mode, line_buf is not used.)
 	 */
 	StringInfoData line_buf;
-	bool		line_buf_converted; /* converted to server encoding? */
 	bool		line_buf_valid; /* contains the row being processed? */
 
 	/*
-	 * Finally, raw_buf holds raw data read from the data source (file or
-	 * client connection).  In text mode, CopyReadLine parses this data
-	 * sufficiently to locate line boundaries, then transfers the data to
-	 * line_buf and converts it.  In binary mode, CopyReadBinaryData fetches
-	 * appropriate amounts of data from this buffer.  In both modes, we
-	 * guarantee that there is a \0 at raw_buf[raw_buf_len].
+	 * input_buf holds input data, already converted to database encoding.
+	 *
+	 * In text mode, CopyReadLine parses this data sufficiently to locate
+	 * line boundaries, then transfers the data to line_buf. We guarantee
+	 * that there is a \0 at input_buf[input_buf_len] at all times.  (In
+	 * binary mode, input_buf is not used.)
+	 *
+	 * If encoding conversion is not required, input_buf is not a separate
+	 * buffer but points directly to raw_buf.  In that case, input_buf_len
+	 * tracks the number of bytes that have been verified as valid in the
+	 * database encoding, and raw_buf_len is the total number of bytes
+	 * stored in the buffer.
+	 */
+#define INPUT_BUF_SIZE 65536	/* we palloc INPUT_BUF_SIZE+1 bytes */
+	char	   *input_buf;
+	int			input_buf_index;	/* next byte to process */
+	int			input_buf_len;		/* total # of bytes stored */
+	bool		input_reached_eof;	/* true if we reached EOF */
+	bool		input_reached_error; /* true if a conversion error happened */
+	/* Shorthand for number of unconsumed bytes available in input_buf */
+#define INPUT_BUF_BYTES(cstate) ((cstate)->input_buf_len - (cstate)->input_buf_index)
+
+	/*
+	 * raw_buf holds raw input data read from the data source (file or client
+	 * connection), not yet converted to the database encoding.  Like with
+	 * 'input_buf', we guarantee that there is a \0 at raw_buf[raw_buf_len].
 	 */
 #define RAW_BUF_SIZE 65536		/* we palloc RAW_BUF_SIZE+1 bytes */
 	char	   *raw_buf;
 	int			raw_buf_index;	/* next byte to process */
 	int			raw_buf_len;	/* total # of bytes stored */
-	uint64		bytes_processed;/* number of bytes processed so far */
+	bool		raw_reached_eof;	/* true if we reached EOF */
+
 	/* Shorthand for number of unconsumed bytes available in raw_buf */
 #define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
+
+	uint64		bytes_processed;	/* number of bytes processed so far */
 } CopyFromStateData;
 
 extern void ReceiveCopyBegin(CopyFromState cstate);
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index a9aaff9e6dc..0f31e683189 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -306,15 +306,33 @@ typedef enum pg_enc
 
 /*
  * When converting strings between different encodings, we assume that space
- * for converted result is 4-to-1 growth in the worst case. The rate for
+ * for converted result is 4-to-1 growth in the worst case.  The rate for
  * currently supported encoding pairs are within 3 (SJIS JIS X0201 half width
- * kanna -> UTF8 is the worst case).  So "4" should be enough for the moment.
+ * kana -> UTF8 is the worst case).  So "4" should be enough for the moment.
  *
  * Note that this is not the same as the maximum character width in any
  * particular encoding.
  */
 #define MAX_CONVERSION_GROWTH  4
 
+/*
+ * Maximum byte length of a string that's required in any encoding to convert
+ * at least one character to any other encoding.  In other words, if you feed
+ * MAX_CONVERSION_INPUT_LENGTH bytes to any encoding conversion function, it
+ * is guaranteed to be able to convert something without needing more input
+ * (assuming the input is valid).
+ *
+ * Currently, the maximum case is the conversion UTF8 -> SJIS JIS X0201 half
+ * width kana, where a pair of UTF-8 characters is converted into a single
+ * SHIFT_JIS_2004 character (the reverse of the worst case for
+ * MAX_CONVERSION_GROWTH).  It needs 6 bytes of input.  In theory, a
+ * user-defined conversion function might have more complicated cases, although
+ * for the reverse mapping you would probably also need to bump up
+ * MAX_CONVERSION_GROWTH.  But there is no need to be stingy here, so make it
+ * generous.
+ */
+#define MAX_CONVERSION_INPUT_LENGTH	16
+
 /*
  * Maximum byte length of the string equivalent to any one Unicode code point,
  * in any backend encoding.  The current value assumes that a 4-byte UTF-8