diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index d3c7c356d9f..4325e4d0eda 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.689 2009/11/09 02:36:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.690 2009/11/09 18:38:48 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -75,12 +75,6 @@
 			(Current) = (Rhs)[0]; \
 	} while (0)
 
-/*
- * The %name-prefix option below will make bison call base_yylex, but we
- * really want it to call filtered_base_yylex (see parser.c).
- */
-#define base_yylex filtered_base_yylex
-
 /*
  * Bison doesn't allocate anything that needs to live across parser calls,
  * so we can easily have it use palloc instead of malloc.  This prevents
@@ -104,10 +98,10 @@ typedef struct PrivTarget
 #define parser_yyerror(msg)  scanner_yyerror(msg, yyscanner)
 #define parser_errposition(pos)  scanner_errposition(pos, yyscanner)
 
-static void base_yyerror(YYLTYPE *yylloc, base_yyscan_t yyscanner,
+static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner,
 						 const char *msg);
 static Node *makeColumnRef(char *colname, List *indirection,
-						   int location, base_yyscan_t yyscanner);
+						   int location, core_yyscan_t yyscanner);
 static Node *makeTypeCast(Node *arg, TypeName *typename, int location);
 static Node *makeStringConst(char *str, int location);
 static Node *makeStringConstCast(char *str, int location, TypeName *typename);
@@ -118,17 +112,17 @@ static Node *makeNullAConst(int location);
 static Node *makeAConst(Value *v, int location);
 static Node *makeBoolAConst(bool state, int location);
 static FuncCall *makeOverlaps(List *largs, List *rargs,
-							  int location, base_yyscan_t yyscanner);
-static void check_qualified_name(List *names, base_yyscan_t yyscanner);
-static List *check_func_name(List *names, base_yyscan_t yyscanner);
-static List *check_indirection(List *indirection, base_yyscan_t yyscanner);
+							  int location, core_yyscan_t yyscanner);
+static void check_qualified_name(List *names, core_yyscan_t yyscanner);
+static List *check_func_name(List *names, core_yyscan_t yyscanner);
+static List *check_indirection(List *indirection, core_yyscan_t yyscanner);
 static List *extractArgTypes(List *parameters);
 static SelectStmt *findLeftmostSelect(SelectStmt *node);
 static void insertSelectOptions(SelectStmt *stmt,
 								List *sortClause, List *lockingClause,
 								Node *limitOffset, Node *limitCount,
 								WithClause *withClause,
-								base_yyscan_t yyscanner);
+								core_yyscan_t yyscanner);
 static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
 static Node *doNegate(Node *n, int location);
 static void doNegateFloat(Value *v);
@@ -145,15 +139,18 @@ static TypeName *TableFuncTypeName(List *columns);
 %name-prefix="base_yy"
 %locations
 
-%parse-param {base_yyscan_t yyscanner}
-%lex-param   {base_yyscan_t yyscanner}
+%parse-param {core_yyscan_t yyscanner}
+%lex-param   {core_yyscan_t yyscanner}
 
 %union
 {
+	core_YYSTYPE		core_yystype;
+	/* these fields must match core_YYSTYPE: */
 	int					ival;
-	char				chr;
 	char				*str;
 	const char			*keyword;
+
+	char				chr;
 	bool				boolean;
 	JoinType			jtype;
 	DropBehavior		dbehavior;
@@ -162,7 +159,6 @@ static TypeName *TableFuncTypeName(List *columns);
 	Node				*node;
 	Value				*value;
 	ObjectType			objtype;
-
 	TypeName			*typnam;
 	FunctionParameter   *fun_param;
 	FunctionParameterMode fun_param_mode;
@@ -180,7 +176,6 @@ static TypeName *TableFuncTypeName(List *columns);
 	ResTarget			*target;
 	struct PrivTarget	*privtarget;
 	AccessPriv			*accesspriv;
-
 	InsertStmt			*istmt;
 	VariableSetStmt		*vsetstmt;
 }
@@ -602,6 +597,7 @@ static TypeName *TableFuncTypeName(List *columns);
 %left		JOIN CROSS LEFT FULL RIGHT INNER_P NATURAL
 /* kluge to keep xml_whitespace_option from causing shift/reduce conflicts */
 %right		PRESERVE STRIP_P
+
 %%
 
 /*
@@ -10932,14 +10928,14 @@ reserved_keyword:
  * available from the scanner.
  */
 static void
-base_yyerror(YYLTYPE *yylloc, base_yyscan_t yyscanner, const char *msg)
+base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner, const char *msg)
 {
 	parser_yyerror(msg);
 }
 
 static Node *
 makeColumnRef(char *colname, List *indirection,
-			  int location, base_yyscan_t yyscanner)
+			  int location, core_yyscan_t yyscanner)
 {
 	/*
 	 * Generate a ColumnRef node, with an A_Indirection node added if there
@@ -11109,7 +11105,7 @@ makeBoolAConst(bool state, int location)
  * Create and populate a FuncCall node to support the OVERLAPS operator.
  */
 static FuncCall *
-makeOverlaps(List *largs, List *rargs, int location, base_yyscan_t yyscanner)
+makeOverlaps(List *largs, List *rargs, int location, core_yyscan_t yyscanner)
 {
 	FuncCall *n = makeNode(FuncCall);
 
@@ -11143,7 +11139,7 @@ makeOverlaps(List *largs, List *rargs, int location, base_yyscan_t yyscanner)
  * subscripts and '*', which we then must reject here.
  */
 static void
-check_qualified_name(List *names, base_yyscan_t yyscanner)
+check_qualified_name(List *names, core_yyscan_t yyscanner)
 {
 	ListCell   *i;
 
@@ -11160,7 +11156,7 @@ check_qualified_name(List *names, base_yyscan_t yyscanner)
  * and '*', which we then must reject here.
  */
 static List *
-check_func_name(List *names, base_yyscan_t yyscanner)
+check_func_name(List *names, core_yyscan_t yyscanner)
 {
 	ListCell   *i;
 
@@ -11178,7 +11174,7 @@ check_func_name(List *names, base_yyscan_t yyscanner)
  * in the grammar, so do it here.
  */
 static List *
-check_indirection(List *indirection, base_yyscan_t yyscanner)
+check_indirection(List *indirection, core_yyscan_t yyscanner)
 {
 	ListCell *l;
 
@@ -11237,7 +11233,7 @@ insertSelectOptions(SelectStmt *stmt,
 					List *sortClause, List *lockingClause,
 					Node *limitOffset, Node *limitCount,
 					WithClause *withClause,
-					base_yyscan_t yyscanner)
+					core_yyscan_t yyscanner)
 {
 	Assert(IsA(stmt, SelectStmt));
 
@@ -11463,12 +11459,9 @@ TableFuncTypeName(List *columns)
 }
 
 /*
- * Must undefine base_yylex before including scan.c, since we want it
- * to create the function base_yylex not filtered_base_yylex.
+ * Must undefine this stuff before including scan.c, since it has different
+ * definitions for these macros.
  */
-#undef base_yylex
-
-/* Undefine some other stuff that would conflict in scan.c, too */
 #undef yyerror
 #undef yylval
 #undef yylloc
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c
index 93632c88114..354e335ce91 100644
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -14,7 +14,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.81 2009/07/14 20:24:10 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.82 2009/11/09 18:38:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,14 +34,15 @@
 List *
 raw_parser(const char *str)
 {
-	base_yyscan_t yyscanner;
+	core_yyscan_t yyscanner;
 	base_yy_extra_type yyextra;
 	int			yyresult;
 
 	/* initialize the flex scanner */
-	yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords);
+	yyscanner = scanner_init(str, &yyextra.core_yy_extra,
+							 ScanKeywords, NumScanKeywords);
 
-	/* filtered_base_yylex() only needs this much initialization */
+	/* base_yylex() only needs this much initialization */
 	yyextra.have_lookahead = false;
 
 	/* initialize the bison parser */
@@ -73,15 +74,16 @@ raw_parser(const char *str)
 char *
 pg_parse_string_token(const char *token)
 {
-	base_yyscan_t yyscanner;
+	core_yyscan_t yyscanner;
 	base_yy_extra_type yyextra;
 	int			ctoken;
-	YYSTYPE		yylval;
+	core_YYSTYPE yylval;
 	YYLTYPE		yylloc;
 
-	yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords);
+	yyscanner = scanner_init(token, &yyextra.core_yy_extra,
+							 ScanKeywords, NumScanKeywords);
 
-	ctoken = base_yylex(&yylval, &yylloc, yyscanner);
+	ctoken = core_yylex(&yylval, &yylloc, yyscanner);
 
 	if (ctoken != SCONST)		/* caller error */
 		elog(ERROR, "expected string constant, got token code %d", ctoken);
@@ -93,7 +95,7 @@ pg_parse_string_token(const char *token)
 
 
 /*
- * Intermediate filter between parser and base lexer (base_yylex in scan.l).
+ * Intermediate filter between parser and core lexer (core_yylex in scan.l).
  *
  * The filter is needed because in some cases the standard SQL grammar
  * requires more than one token lookahead.	We reduce these cases to one-token
@@ -104,26 +106,30 @@ pg_parse_string_token(const char *token)
  * words.  Furthermore it's not clear how to do it without re-introducing
  * scanner backtrack, which would cost more performance than this filter
  * layer does.
+ *
+ * The filter also provides a convenient place to translate between
+ * the core_YYSTYPE and YYSTYPE representations (which are really the
+ * same thing anyway, but notationally they're different).
  */
 int
-filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
+base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
 {
 	base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
 	int			cur_token;
 	int			next_token;
-	YYSTYPE		cur_yylval;
+	core_YYSTYPE cur_yylval;
 	YYLTYPE		cur_yylloc;
 
 	/* Get next token --- we might already have it */
 	if (yyextra->have_lookahead)
 	{
 		cur_token = yyextra->lookahead_token;
-		*lvalp = yyextra->lookahead_yylval;
+		lvalp->core_yystype = yyextra->lookahead_yylval;
 		*llocp = yyextra->lookahead_yylloc;
 		yyextra->have_lookahead = false;
 	}
 	else
-		cur_token = base_yylex(lvalp, llocp, yyscanner);
+		cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
 
 	/* Do we need to look ahead for a possible multiword token? */
 	switch (cur_token)
@@ -133,9 +139,9 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 			/*
 			 * NULLS FIRST and NULLS LAST must be reduced to one token
 			 */
-			cur_yylval = *lvalp;
+			cur_yylval = lvalp->core_yystype;
 			cur_yylloc = *llocp;
-			next_token = base_yylex(lvalp, llocp, yyscanner);
+			next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
 			switch (next_token)
 			{
 				case FIRST_P:
@@ -147,11 +153,11 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 				default:
 					/* save the lookahead token for next time */
 					yyextra->lookahead_token = next_token;
-					yyextra->lookahead_yylval = *lvalp;
+					yyextra->lookahead_yylval = lvalp->core_yystype;
 					yyextra->lookahead_yylloc = *llocp;
 					yyextra->have_lookahead = true;
 					/* and back up the output info to cur_token */
-					*lvalp = cur_yylval;
+					lvalp->core_yystype = cur_yylval;
 					*llocp = cur_yylloc;
 					break;
 			}
@@ -162,9 +168,9 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 			/*
 			 * WITH TIME must be reduced to one token
 			 */
-			cur_yylval = *lvalp;
+			cur_yylval = lvalp->core_yystype;
 			cur_yylloc = *llocp;
-			next_token = base_yylex(lvalp, llocp, yyscanner);
+			next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
 			switch (next_token)
 			{
 				case TIME:
@@ -173,11 +179,11 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 				default:
 					/* save the lookahead token for next time */
 					yyextra->lookahead_token = next_token;
-					yyextra->lookahead_yylval = *lvalp;
+					yyextra->lookahead_yylval = lvalp->core_yystype;
 					yyextra->lookahead_yylloc = *llocp;
 					yyextra->have_lookahead = true;
 					/* and back up the output info to cur_token */
-					*lvalp = cur_yylval;
+					lvalp->core_yystype = cur_yylval;
 					*llocp = cur_yylloc;
 					break;
 			}
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 150202e77ce..8a53221930c 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -24,7 +24,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.162 2009/09/27 03:27:23 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.163 2009/11/09 18:38:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -33,8 +33,8 @@
 #include <ctype.h>
 #include <unistd.h>
 
-#include "parser/gramparse.h"
-#include "parser/keywords.h"
+#include "parser/parser.h"				/* only needed for GUC variables */
+#include "parser/scanner.h"
 #include "parser/scansup.h"
 #include "mb/pg_wchar.h"
 
@@ -54,11 +54,16 @@ int				backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
 bool			escape_string_warning = true;
 bool			standard_conforming_strings = false;
 
+/*
+ * Set the type of YYSTYPE.
+ */
+#define YYSTYPE core_YYSTYPE
+
 /*
  * Set the type of yyextra.  All state variables used by the scanner should
  * be in yyextra, *not* statically allocated.
  */
-#define YY_EXTRA_TYPE base_yy_extra_type *
+#define YY_EXTRA_TYPE core_yy_extra_type *
 
 /*
  * Each call to yylex must set yylloc to the location of the found token
@@ -75,21 +80,22 @@ bool			standard_conforming_strings = false;
 #define ADVANCE_YYLLOC(delta)  ( *(yylloc) += (delta) )
 
 #define startlit()  ( yyextra->literallen = 0 )
-static void addlit(char *ytext, int yleng, base_yyscan_t yyscanner);
-static void addlitchar(unsigned char ychar, base_yyscan_t yyscanner);
-static char *litbufdup(base_yyscan_t yyscanner);
-static char *litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner);
-static unsigned char unescape_single_char(unsigned char c, base_yyscan_t yyscanner);
+static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
+static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
+static char *litbufdup(core_yyscan_t yyscanner);
+static char *litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner);
+static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
 static bool is_utf16_surrogate_first(pg_wchar c);
 static bool is_utf16_surrogate_second(pg_wchar c);
 static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
+static void addunicode(pg_wchar c, yyscan_t yyscanner);
 
 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
 
 #define lexer_errposition()  scanner_errposition(*(yylloc), yyscanner)
 
-static void check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner);
-static void check_escape_warning(base_yyscan_t yyscanner);
+static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner);
+static void check_escape_warning(core_yyscan_t yyscanner);
 
 /*
  * Work around a bug in flex 2.5.35: it emits a couple of functions that
@@ -97,10 +103,8 @@ static void check_escape_warning(base_yyscan_t yyscanner);
  * this would cause warnings.  Providing our own declarations should be
  * harmless even when the bug gets fixed.
  */
-extern int	base_yyget_column(yyscan_t yyscanner);
-extern void base_yyset_column(int column_no, yyscan_t yyscanner);
-
-static void addunicode(pg_wchar c, yyscan_t yyscanner);
+extern int	core_yyget_column(yyscan_t yyscanner);
+extern void core_yyset_column(int column_no, yyscan_t yyscanner);
 
 %}
 
@@ -117,7 +121,7 @@ static void addunicode(pg_wchar c, yyscan_t yyscanner);
 %option noyyrealloc
 %option noyyfree
 %option warn
-%option prefix="base_yy"
+%option prefix="core_yy"
 
 /*
  * OK, here is a short description of lex/flex rules behavior.
@@ -958,7 +962,7 @@ other			.
  * to still be available.
  */
 int
-scanner_errposition(int location, base_yyscan_t yyscanner)
+scanner_errposition(int location, core_yyscan_t yyscanner)
 {
 	int		pos;
 
@@ -984,7 +988,7 @@ scanner_errposition(int location, base_yyscan_t yyscanner)
  * be misleading!
  */
 void
-scanner_yyerror(const char *message, base_yyscan_t yyscanner)
+scanner_yyerror(const char *message, core_yyscan_t yyscanner)
 {
 	const char *loc = yyextra->scanbuf + *yylloc;
 
@@ -1010,9 +1014,9 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner)
 /*
  * Called before any actual parsing is done
  */
-base_yyscan_t
+core_yyscan_t
 scanner_init(const char *str,
-			 base_yy_extra_type *yyext,
+			 core_yy_extra_type *yyext,
 			 const ScanKeyword *keywords,
 			 int num_keywords)
 {
@@ -1022,7 +1026,7 @@ scanner_init(const char *str,
 	if (yylex_init(&scanner) != 0)
 		elog(ERROR, "yylex_init() failed: %m");
 
-	base_yyset_extra(yyext, scanner);
+	core_yyset_extra(yyext, scanner);
 
 	yyext->keywords = keywords;
 	yyext->num_keywords = num_keywords;
@@ -1049,7 +1053,7 @@ scanner_init(const char *str,
  * Called after parsing is done to clean up after scanner_init()
  */
 void
-scanner_finish(base_yyscan_t yyscanner)
+scanner_finish(core_yyscan_t yyscanner)
 {
 	/*
 	 * We don't bother to call yylex_destroy(), because all it would do
@@ -1069,7 +1073,7 @@ scanner_finish(base_yyscan_t yyscanner)
 
 
 static void
-addlit(char *ytext, int yleng, base_yyscan_t yyscanner)
+addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
 {
 	/* enlarge buffer if needed */
 	if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
@@ -1087,7 +1091,7 @@ addlit(char *ytext, int yleng, base_yyscan_t yyscanner)
 
 
 static void
-addlitchar(unsigned char ychar, base_yyscan_t yyscanner)
+addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
 {
 	/* enlarge buffer if needed */
 	if ((yyextra->literallen + 1) >= yyextra->literalalloc)
@@ -1106,7 +1110,7 @@ addlitchar(unsigned char ychar, base_yyscan_t yyscanner)
  * Create a palloc'd copy of literalbuf, adding a trailing null.
  */
 static char *
-litbufdup(base_yyscan_t yyscanner)
+litbufdup(core_yyscan_t yyscanner)
 {
 	int			llen = yyextra->literallen;
 	char	   *new;
@@ -1131,7 +1135,7 @@ hexval(unsigned char c)
 }
 
 static void
-check_unicode_value(pg_wchar c, char *loc, base_yyscan_t yyscanner)
+check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner)
 {
 	if (GetDatabaseEncoding() == PG_UTF8)
 		return;
@@ -1161,8 +1165,25 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
 	return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
 }
 
+static void
+addunicode(pg_wchar c, core_yyscan_t yyscanner)
+{
+	char buf[8];
+
+	if (c == 0 || c > 0x10FFFF)
+		yyerror("invalid Unicode escape value");
+	if (c > 0x7F)
+	{
+		if (GetDatabaseEncoding() != PG_UTF8)
+			yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
+		yyextra->saw_non_ascii = true;
+	}
+	unicode_to_utf8(c, (unsigned char *)buf);
+	addlit(buf, pg_mblen(buf), yyscanner);
+}
+
 static char *
-litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner)
+litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
 {
 	char *new;
 	char *litbuf, *in, *out;
@@ -1294,7 +1315,7 @@ litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner)
 }
 
 static unsigned char
-unescape_single_char(unsigned char c, base_yyscan_t yyscanner)
+unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
 {
 	switch (c)
 	{
@@ -1318,7 +1339,7 @@ unescape_single_char(unsigned char c, base_yyscan_t yyscanner)
 }
 
 static void
-check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner)
+check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner)
 {
 	if (ychar == '\'')
 	{
@@ -1345,7 +1366,7 @@ check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner)
 }
 
 static void
-check_escape_warning(base_yyscan_t yyscanner)
+check_escape_warning(core_yyscan_t yyscanner)
 {
 	if (yyextra->warn_on_first_escape && escape_string_warning)
 		ereport(WARNING,
@@ -1362,13 +1383,13 @@ check_escape_warning(base_yyscan_t yyscanner)
  */
 
 void *
-base_yyalloc(yy_size_t bytes, base_yyscan_t yyscanner)
+core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
 {
 	return palloc(bytes);
 }
 
 void *
-base_yyrealloc(void *ptr, yy_size_t bytes, base_yyscan_t yyscanner)
+core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
 {
 	if (ptr)
 		return repalloc(ptr, bytes);
@@ -1377,26 +1398,8 @@ base_yyrealloc(void *ptr, yy_size_t bytes, base_yyscan_t yyscanner)
 }
 
 void
-base_yyfree(void *ptr, base_yyscan_t yyscanner)
+core_yyfree(void *ptr, core_yyscan_t yyscanner)
 {
 	if (ptr)
 		pfree(ptr);
 }
-
-static void
-addunicode(pg_wchar c, base_yyscan_t yyscanner)
-{
-	char buf[8];
-
-	if (c == 0 || c > 0x10FFFF)
-		yyerror("invalid Unicode escape value");
-	if (c > 0x7F)
-	{
-		if (GetDatabaseEncoding() != PG_UTF8)
-			yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
-		yyextra->saw_non_ascii = true;
-	}
-	unicode_to_utf8(c, (unsigned char *)buf);
-	addlit(buf, pg_mblen(buf), yyscanner);
-}
-
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h
index 09c99091361..41774028b5b 100644
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.49 2009/11/05 23:24:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.50 2009/11/09 18:38:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,20 +20,11 @@
 #define GRAMPARSE_H
 
 #include "nodes/parsenodes.h"
-#include "parser/keywords.h"
+#include "parser/scanner.h"
 
 /*
- * We track token locations in terms of byte offsets from the start of the
- * source string, not the column number/line number representation that
- * bison uses by default.  Also, to minimize overhead we track only one
- * location (usually the first token location) for each construct, not
- * the beginning and ending locations as bison does by default.  It's
- * therefore sufficient to make YYLTYPE an int.
- */
-#define YYLTYPE  int
-
-/*
- * After defining YYLTYPE, it's safe to include gram.h.
+ * NB: include gram.h only AFTER including scanner.h, because scanner.h
+ * is what #defines YYLTYPE.
  */
 #include "parser/gram.h"
 
@@ -44,62 +35,24 @@
 typedef struct base_yy_extra_type
 {
 	/*
-	 * The string the lexer is physically scanning.  We keep this mainly so
-	 * that we can cheaply compute the offset of the current token (yytext).
+	 * Fields used by the core scanner.
 	 */
-	char	   *scanbuf;
-	Size		scanbuflen;
+	core_yy_extra_type core_yy_extra;
 
 	/*
-	 * The keyword list to use.
-	 */
-	const ScanKeyword *keywords;
-	int			num_keywords;
-
-	/*
-	 * literalbuf is used to accumulate literal values when multiple rules
-	 * are needed to parse a single literal.  Call startlit() to reset buffer
-	 * to empty, addlit() to add text.  NOTE: the string in literalbuf is
-	 * NOT necessarily null-terminated, but there always IS room to add a
-	 * trailing null at offset literallen.  We store a null only when we
-	 * need it.
-	 */
-	char	   *literalbuf;		/* palloc'd expandable buffer */
-	int			literallen;		/* actual current string length */
-	int			literalalloc;	/* current allocated buffer size */
-
-	int			xcdepth;		/* depth of nesting in slash-star comments */
-	char	   *dolqstart;		/* current $foo$ quote start string */
-
-	/* first part of UTF16 surrogate pair for Unicode escapes */
-	int32		utf16_first_part;
-
-	/* state variables for literal-lexing warnings */
-	bool		warn_on_first_escape;
-	bool		saw_non_ascii;
-
-	/*
-	 * State variables for filtered_base_yylex().
+	 * State variables for base_yylex().
 	 */
 	bool		have_lookahead;		/* is lookahead info valid? */
 	int			lookahead_token;	/* one-token lookahead */
-	YYSTYPE		lookahead_yylval;	/* yylval for lookahead token */
+	core_YYSTYPE lookahead_yylval;	/* yylval for lookahead token */
 	YYLTYPE		lookahead_yylloc;	/* yylloc for lookahead token */
 
 	/*
-	 * State variables that belong to the grammar, not the lexer.  It's
-	 * simpler to keep these here than to invent a separate structure.
-	 * These fields are unused/undefined if the lexer is invoked on its own.
+	 * State variables that belong to the grammar.
 	 */
-
 	List	   *parsetree;		/* final parse result is delivered here */
 } base_yy_extra_type;
 
-/*
- * The type of yyscanner is opaque outside scan.l.
- */
-typedef void *base_yyscan_t;
-
 /*
  * In principle we should use yyget_extra() to fetch the yyextra field
  * from a yyscanner struct.  However, flex always puts that field first,
@@ -110,22 +63,11 @@ typedef void *base_yyscan_t;
 
 
 /* from parser.c */
-extern int	filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
-								base_yyscan_t yyscanner);
-
-/* from scan.l */
-extern base_yyscan_t scanner_init(const char *str,
-								  base_yy_extra_type *yyext,
-								  const ScanKeyword *keywords,
-								  int num_keywords);
-extern void scanner_finish(base_yyscan_t yyscanner);
 extern int	base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
-					   base_yyscan_t yyscanner);
-extern int	scanner_errposition(int location, base_yyscan_t yyscanner);
-extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner);
+					   core_yyscan_t yyscanner);
 
 /* from gram.y */
 extern void parser_init(base_yy_extra_type *yyext);
-extern int	base_yyparse(base_yyscan_t yyscanner);
+extern int	base_yyparse(core_yyscan_t yyscanner);
 
 #endif   /* GRAMPARSE_H */
diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h
new file mode 100644
index 00000000000..ccab1db862c
--- /dev/null
+++ b/src/include/parser/scanner.h
@@ -0,0 +1,120 @@
+/*-------------------------------------------------------------------------
+ *
+ * scanner.h
+ *		API for the core scanner (flex machine)
+ *
+ * The core scanner is also used by PL/pgsql, so we provide a public API
+ * for it.  However, the rest of the backend is only expected to use the
+ * higher-level API provided by parser.h.
+ *
+ *
+ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/parser/scanner.h,v 1.1 2009/11/09 18:38:48 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SCANNER_H
+#define SCANNER_H
+
+#include "parser/keywords.h"
+
+/*
+ * The scanner returns extra data about scanned tokens in this union type.
+ * Note that this is a subset of the fields used in YYSTYPE of the bison
+ * parsers built atop the scanner.
+ */
+typedef union core_YYSTYPE
+{
+	int			ival;			/* for integer literals */
+	char	   *str;			/* for identifiers and non-integer literals */
+	const char *keyword;		/* canonical spelling of keywords */
+} core_YYSTYPE;
+
+/*
+ * We track token locations in terms of byte offsets from the start of the
+ * source string, not the column number/line number representation that
+ * bison uses by default.  Also, to minimize overhead we track only one
+ * location (usually the first token location) for each construct, not
+ * the beginning and ending locations as bison does by default.  It's
+ * therefore sufficient to make YYLTYPE an int.
+ */
+#define YYLTYPE  int
+
+/*
+ * Another important component of the scanner's API is the token code numbers.
+ * However, those are not defined in this file, because bison insists on
+ * defining them for itself.  The token codes used by the core scanner are
+ * the ASCII characters plus these:
+ *	%token <str>	IDENT FCONST SCONST BCONST XCONST Op
+ *	%token <ival>	ICONST PARAM
+ *	%token			TYPECAST DOT_DOT COLON_EQUALS
+ * The above token definitions *must* be the first ones declared in any
+ * bison parser built atop this scanner, so that they will have consistent
+ * numbers assigned to them (specifically, IDENT = 258 and so on).
+ */
+
+/*
+ * The YY_EXTRA data that a flex scanner allows us to pass around.
+ * Private state needed by the core scanner goes here.  Note that the actual
+ * yy_extra struct may be larger and have this as its first component, thus
+ * allowing the calling parser to keep some fields of its own in YY_EXTRA.
+ */
+typedef struct core_yy_extra_type
+{
+	/*
+	 * The string the scanner is physically scanning.  We keep this mainly so
+	 * that we can cheaply compute the offset of the current token (yytext).
+	 */
+	char	   *scanbuf;
+	Size		scanbuflen;
+
+	/*
+	 * The keyword list to use.
+	 */
+	const ScanKeyword *keywords;
+	int			num_keywords;
+
+	/*
+	 * literalbuf is used to accumulate literal values when multiple rules
+	 * are needed to parse a single literal.  Call startlit() to reset buffer
+	 * to empty, addlit() to add text.  NOTE: the string in literalbuf is
+	 * NOT necessarily null-terminated, but there always IS room to add a
+	 * trailing null at offset literallen.  We store a null only when we
+	 * need it.
+	 */
+	char	   *literalbuf;		/* palloc'd expandable buffer */
+	int			literallen;		/* actual current string length */
+	int			literalalloc;	/* current allocated buffer size */
+
+	int			xcdepth;		/* depth of nesting in slash-star comments */
+	char	   *dolqstart;		/* current $foo$ quote start string */
+
+	/* first part of UTF16 surrogate pair for Unicode escapes */
+	int32		utf16_first_part;
+
+	/* state variables for literal-lexing warnings */
+	bool		warn_on_first_escape;
+	bool		saw_non_ascii;
+} core_yy_extra_type;
+
+/*
+ * The type of yyscanner is opaque outside scan.l.
+ */
+typedef void *core_yyscan_t;
+
+
+/* Entry points in parser/scan.l */
+extern core_yyscan_t scanner_init(const char *str,
+								  core_yy_extra_type *yyext,
+								  const ScanKeyword *keywords,
+								  int num_keywords);
+extern void scanner_finish(core_yyscan_t yyscanner);
+extern int	core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
+					   core_yyscan_t yyscanner);
+extern int	scanner_errposition(int location, core_yyscan_t yyscanner);
+extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner);
+
+#endif   /* SCANNER_H */