From d0f60e4cc5f20bf64ee12d740e52db2773a93c21 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 21 Dec 2016 17:39:32 -0500
Subject: [PATCH] Fix detection of unfinished Unicode surrogate pair at end of
 string.

The U&'...' and U&"..." syntaxes silently discarded a surrogate pair
start (that is, a code between U+D800 and U+DBFF) if it occurred at
the very end of the string.  This seems like an obvious oversight,
since we throw an error for every other invalid combination of surrogate
characters, including the very same situation in E'...' syntax.

This has been wrong since the pair processing was added (in 9.0),
so back-patch to all supported branches.

Discussion: https://postgr.es/m/19113.1482337898@sss.pgh.pa.us
---
 src/backend/parser/scan.l | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 36b21c764c8..30242d343d1 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -1395,7 +1395,15 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
 		}
 	}
 
+	/* unfinished surrogate pair? */
+	if (pair_first)
+	{
+		ADVANCE_YYLLOC(in - litbuf + 3);				/* 3 for U&" */
+		yyerror("invalid Unicode surrogate pair");
+	}
+
 	*out = '\0';
+
 	/*
 	 * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
 	 * codes; but it's probably not worth the trouble, since this isn't