Align ECPG lexer more closely with the core and psql lexers.

Make a bunch of basically-cosmetic changes to reduce the diffs between the flex rules in scan.l, psqlscan.l, and pgc.l. Reorder some code, adjust a lot of whitespace, sync some comments, make use of flex start condition scopes to do that. There are a few non-cosmetic changes in the ECPG lexer: * Bring over the decimalfail rule (and support function process_integer_literal) so that ECPG will lex "1..10" into the same tokens as the backend would. I'm not sure this makes any visible difference to users, but I'm not sure it doesn't, either. * <xdc><<EOF>> gets its own rule so as to produce a more on-point error message. * Remove duplicate <SQL>{xdstart} rule. John Naylor, with a few additional changes by me Discussion: https://postgr.es/m/CAJVSVGWGqY9YBs2EwtRUkbNv=hXkN8yRPOoD1wxE6COgvvrz5g@mail.gmail.com
2025-11-16 15:02:33 +03:00 · 2018-11-13 12:57:52 -05:00
parent d20dceaf50
commit ec937d0805
3 changed files with 623 additions and 471 deletions
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -6,7 +6,8 @@
 *
 * NOTE NOTE NOTE:
 *
- * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l!
+ * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l
+ * and src/interfaces/ecpg/preproc/pgc.l!
 *
 * The rules are designed so that the scanner never has to backtrack,
 * in the sense that there is always a rule that can match the input
@@ -168,8 +169,8 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
 %x xc
 %x xd
 %x xh
-%x xe
 %x xq
+%x xe
 %x xdolq
 %x xui
 %x xuiend
@@ -192,7 +193,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 *
 * XXX if you change the set of whitespace characters, fix scanner_isspace()
- * to agree, and see also the plpgsql lexer.
+ * to agree.
 */

 space			[ \t\n\r\f]
@@ -417,32 +418,36 @@ other			.
 					yyless(2);
 				}

-<xc>{xcstart}	{
+<xc>{
+{xcstart}		{
 					(yyextra->xcdepth)++;
 					/* Put back any characters past slash-star; see above */
 					yyless(2);
 				}

-<xc>{xcstop}	{
+{xcstop}		{
 					if (yyextra->xcdepth <= 0)
 						BEGIN(INITIAL);
 					else
 						(yyextra->xcdepth)--;
 				}

-<xc>{xcinside}	{
+{xcinside}		{
 					/* ignore */
 				}

-<xc>{op_chars}	{
+{op_chars}		{
 					/* ignore */
 				}

-<xc>\*+			{
+\*+				{
 					/* ignore */
 				}

-<xc><<EOF>>		{ yyerror("unterminated /* comment"); }
+<<EOF>>			{
+					yyerror("unterminated /* comment");
+				}
+} /* <xc> */

 {xbstart}		{
 					/* Binary bit type.
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -23,6 +23,7 @@
 *
 * See psqlscan_int.h for additional commentary.
 *
+ *
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
@@ -39,6 +40,9 @@
 }

 %{
+
+/* LCOV_EXCL_START */
+
 #include "fe_utils/psqlscan_int.h"

 /*
@@ -71,8 +75,6 @@ typedef int YYSTYPE;
 extern int	psql_yyget_column(yyscan_t yyscanner);
 extern void psql_yyset_column(int column_no, yyscan_t yyscanner);

-/* LCOV_EXCL_START */
-
 %}

 %option reentrant
@@ -128,8 +130,8 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
 %x xc
 %x xd
 %x xh
-%x xe
 %x xq
+%x xe
 %x xdolq
 %x xui
 %x xuiend
@@ -151,7 +153,7 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 *
 * XXX if you change the set of whitespace characters, fix scanner_isspace()
- * to agree, and see also the plpgsql lexer.
+ * to agree.
 */

 space			[ \t\n\r\f]
@@ -402,14 +404,15 @@ other			.
 					ECHO;
 				}

-<xc>{xcstart}	{
+<xc>{
+{xcstart}		{
 					cur_state->xcdepth++;
 					/* Put back any characters past slash-star; see above */
 					yyless(2);
 					ECHO;
 				}

-<xc>{xcstop}	{
+{xcstop}		{
 					if (cur_state->xcdepth <= 0)
 						BEGIN(INITIAL);
 					else
@@ -417,17 +420,18 @@ other			.
 					ECHO;
 				}

-<xc>{xcinside}	{
+{xcinside}		{
 					ECHO;
 				}

-<xc>{op_chars}	{
+{op_chars}		{
 					ECHO;
 				}

-<xc>\*+			{
+\*+				{
 					ECHO;
 				}
+} /* <xc> */

 {xbstart}		{
 					BEGIN(xb);
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -10,7 +10,6 @@
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- *
 * IDENTIFICATION
 *	  src/interfaces/ecpg/preproc/pgc.l
 *
@@ -28,6 +27,9 @@
 }

 %{
+
+/* LCOV_EXCL_START */
+
 extern YYSTYPE base_yylval;

 static int		xcdepth = 0;	/* depth of nesting in slash-star comments */
@@ -53,8 +55,9 @@ static bool		include_next;

 #define startlit()	(literalbuf[0] = '\0', literallen = 0)
 static void addlit(char *ytext, int yleng);
-static void addlitchar (unsigned char);
-static void parse_include (void);
+static void addlitchar(unsigned char);
+static int	process_integer_literal(const char *token, YYSTYPE *lval);
+static void parse_include(void);
 static bool ecpg_isspace(char ch);
 static bool isdefine(void);
 static bool isinformixdefine(void);
@@ -81,8 +84,6 @@ static struct _if_value
 	short else_branch;
 } stacked_if_value[MAX_NESTED_IF];

-/* LCOV_EXCL_START */
-
 %}

 %option 8bit
@@ -91,11 +92,8 @@ static struct _if_value
 %option noinput
 %option noyywrap
 %option warn
-%option prefix="base_yy"
-
 %option yylineno
-
-%x C SQL incl def def_ident undef
+%option prefix="base_yy"

 /*
 * OK, here is a short description of lex/flex rules behavior.
@@ -111,15 +109,21 @@ static struct _if_value
 *  <xb> bit string literal
 *  <xcc> extended C-style comments in C
 *  <xcsql> extended C-style comments in SQL
- *	<xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
- *	<xh> hexadecimal numeric string - thomas 1997-11-16
- *	<xq> standard quoted strings - thomas 1997-07-30
- *	<xqc> standard quoted strings in C - michael
- *	<xe> extended quoted strings (support backslash escape sequences)
+ *  <xd> delimited identifiers (double-quoted identifiers)
+ *  <xdc> double-quoted strings in C
+ *  <xh> hexadecimal numeric string
 *  <xn> national character quoted strings
+ *  <xq> standard quoted strings
+ *  <xe> extended quoted strings (support backslash escape sequences)
+ *  <xqc> single-quoted strings in C
 *  <xdolq> $foo$ quoted strings
 *  <xui> quoted identifier with Unicode escapes
 *  <xus> quoted string with Unicode escapes
+ *  <xcond> condition of an EXEC SQL IFDEF construct
+ *  <xskip> skipping the inactive part of an EXEC SQL IFDEF construct
+ *
+ * Remember to add an <<EOF>> case whenever you add a new exclusive state!
+ * The default one is probably not the right thing.
 */

 %x xb
@@ -128,15 +132,60 @@ static struct _if_value
 %x xd
 %x xdc
 %x xh
-%x xe
 %x xn
 %x xq
+%x xe
 %x xqc
 %x xdolq
-%x xcond
-%x xskip
 %x xui
 %x xus
+%x xcond
+%x xskip
+
+/* Additional exclusive states that are specific to ECPG */
+%x C SQL incl def def_ident undef
+
+/*
+ * In order to make the world safe for Windows and Mac clients as well as
+ * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
+ * sequence will be seen as two successive newlines, but that doesn't cause
+ * any problems.  SQL-style comments, which start with -- and extend to the
+ * next newline, are treated as equivalent to a single whitespace character.
+ *
+ * NOTE a fine point: if there is no newline following --, we will absorb
+ * everything to the end of the input as a comment.  This is correct.  Older
+ * versions of Postgres failed to recognize -- as a comment if the input
+ * did not end with a newline.
+ *
+ * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix ecpg_isspace()
+ * to agree.
+ */
+
+space			[ \t\n\r\f]
+horiz_space		[ \t\f]
+newline			[\n\r]
+non_newline		[^\n\r]
+
+comment			("--"{non_newline}*)
+
+whitespace		({space}+|{comment})
+
+/*
+ * SQL requires at least one newline in the whitespace separating
+ * string literals that are to be concatenated.  Silly, but who are we
+ * to argue?  Note that {whitespace_with_newline} should not have * after
+ * it, whereas {whitespace} should generally have a * after it...
+ */
+
+horiz_whitespace		({horiz_space}|{comment})
+whitespace_with_newline	({horiz_whitespace}*{newline}{whitespace}*)
+
+quote			'
+quotestop		{quote}{whitespace}*
+quotecontinue	{quote}{whitespace_with_newline}{quote}
+quotefail		{quote}{whitespace}*"-"

 /* Bit string
 */
@@ -158,9 +207,6 @@ xeoctesc		[\\][0-7]{1,3}
 xehexesc		[\\]x[0-9A-Fa-f]{1,2}
 xeunicode		[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})

-/* C version of hex number */
-xch				0[xX][0-9A-Fa-f]*
-
 /* Extended quote
 * xqdouble implements embedded quote, ''''
 */
@@ -194,7 +240,9 @@ xddouble		{dquote}{dquote}
 xdinside		[^"]+

 /* Unicode escapes */
-/* (The ecpg scanner is not backup-free, so the fail rules in scan.l are not needed here, but could be added if desired.) */
+/* (The ecpg scanner is not backup-free, so the fail rules in scan.l are
+ * not needed here, but could be added if desired.)
+ */
 uescape			[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}

 /* Quoted identifier with Unicode escapes */
@@ -211,6 +259,7 @@ xdcqdq			\\\"
 xdcother		[^"]
 xdcinside		({xdcqq}|{xdcqdq}|{xdcother})

+
 /* C-style comments
 *
 * The "extended comment" syntax closely resembles allowable operator syntax.
@@ -278,68 +327,40 @@ operator		{op_chars}+
 * instead we pass it separately to parser. there it gets
 * coerced via doNegate() -- Leon aug 20 1999
 *
+ * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ *
 * {realfail1} and {realfail2} are added to prevent the need for scanner
 * backup when the {real} rule fails to match completely.
 */

 integer			{digit}+
 decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
+decimalfail		{digit}+\.\.
 real			({integer}|{decimal})[Ee][-+]?{digit}+
 realfail1		({integer}|{decimal})[Ee]
 realfail2		({integer}|{decimal})[Ee][-+]

 param			\${integer}

-/*
- * In order to make the world safe for Windows and Mac clients as well as
- * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
- * sequence will be seen as two successive newlines, but that doesn't cause
- * any problems.  SQL-style comments, which start with -- and extend to the
- * next newline, are treated as equivalent to a single whitespace character.
- *
- * NOTE a fine point: if there is no newline following --, we will absorb
- * everything to the end of the input as a comment.  This is correct.  Older
- * versions of Postgres failed to recognize -- as a comment if the input
- * did not end with a newline.
- *
- * XXX perhaps \f (formfeed) should be treated as a newline as well?
- *
- * XXX if you change the set of whitespace characters, fix ecpg_isspace()
- * to agree.
- */
-
-ccomment		"//".*\n
-
-space			[ \t\n\r\f]
-horiz_space		[ \t\f]
-newline			[\n\r]
-non_newline		[^\n\r]
-
-comment			("--"{non_newline}*)
-
-whitespace		({space}+|{comment})
-
-/*
- * SQL requires at least one newline in the whitespace separating
- * string literals that are to be concatenated.  Silly, but who are we
- * to argue?  Note that {whitespace_with_newline} should not have * after
- * it, whereas {whitespace} should generally have a * after it...
- */
-
-horiz_whitespace	({horiz_space}|{comment})
-whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
-
-quote			'
-quotestop		{quote}{whitespace}*
-quotecontinue	{quote}{whitespace_with_newline}{quote}
-quotefail		{quote}{whitespace}*"-"
-
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
 informix_special	[\$]

 other			.

+/*
+ * Dollar quoted strings are totally opaque, and no escaping is done on them.
+ * Other quoted strings must allow some special characters such as single-quote
+ *  and newline.
+ * Embedded single-quotes are implemented both in the SQL standard
+ *  style of two adjacent single quotes "''" and in the Postgres/Java style
+ *  of escaped-quote "\'".
+ * Other embedded escaped characters are matched explicitly and the leading
+ *  backslash is dropped from the string.
+ * Note that xcstart must appear before operator, as explained above!
+ *  Also whitespace (comment) must appear before operator.
+ */
+
 /* some stuff needed for ecpg */
 exec			[eE][xX][eE][cC]
 sql				[sS][qQ][lL]
@@ -349,6 +370,11 @@ include_next	[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
 import			[iI][mM][pP][oO][rR][tT]
 undef			[uU][nN][dD][eE][fF]

+/* C version of hex number */
+xch				0[xX][0-9A-Fa-f]*
+
+ccomment		"//".*\n
+
 if				[iI][fF]
 ifdef			[iI][fF][dD][eE][fF]
 ifndef			[iI][fF][nN][dD][eE][fF]
@@ -366,24 +392,12 @@ ip				{ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
 cppinclude		{space}*#{include}{space}*
 cppinclude_next		{space}*#{include_next}{space}*

-/* take care of cpp lines, they may also be continuated */
+/* take care of cpp lines, they may also be continued */
 /* first a general line for all commands not starting with "i" */
 /* and then the other commands starting with "i", we have to add these
- * separately because the cppline production would match on "include" too */
-cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
-
-/*
- * Dollar quoted strings are totally opaque, and no escaping is done on them.
- * Other quoted strings must allow some special characters such as single-quote
- *	and newline.
- * Embedded single-quotes are implemented both in the SQL standard
- *	style of two adjacent single quotes "''" and in the Postgres/Java style
- *	of escaped-quote "\'".
- * Other embedded escaped characters are matched explicitly and the leading
- *	backslash is dropped from the string. - thomas 1997-09-24
- * Note that xcstart must appear before operator, as explained above!
- *	Also whitespace (comment) must appear before operator.
+ * separately because the cppline production would match on "include" too
 */
+cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}

 %%

@@ -392,7 +406,21 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 		token_start = NULL;
 %}

-<SQL>{whitespace}	{ /* ignore */ }
+<SQL>{
+{whitespace}	{
+					/* ignore */
+				}
+
+{xcstart}		{
+					token_start = yytext;
+					state_before = YYSTATE;
+					xcdepth = 0;
+					BEGIN(xcsql);
+					/* Put back any characters past slash-star; see above */
+					yyless(2);
+					fputs("/*", yyout);
+				}
+} /* <SQL> */

 <C>{xcstart}	{
 					token_start = yytext;
@@ -403,15 +431,6 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					yyless(2);
 					fputs("/*", yyout);
 				}
-<SQL>{xcstart}		{
-					token_start = yytext;
-					state_before = YYSTATE;
-					xcdepth = 0;
-					BEGIN(xcsql);
-					/* Put back any characters past slash-star; see above */
-					yyless(2);
-					fputs("/*", yyout);
-				}
 <xcc>{xcstart}	{ ECHO; }
 <xcsql>{xcstart}	{
 					xcdepth++;
@@ -437,18 +456,34 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					BEGIN(state_before);
 					token_start = NULL;
 				}
-<xcc,xcsql>{xcinside}	{ ECHO; }
-<xcc,xcsql>{op_chars}	{ ECHO; }
-<xcc,xcsql>\*+		{ ECHO; }

-<xcc,xcsql><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated /* comment"); }
+<xcc,xcsql>{
+{xcinside}		{
+					ECHO;
+				}

-<SQL>{xbstart}	{
+{op_chars}		{
+					ECHO;
+				}
+
+\*+				{
+					ECHO;
+				}
+
+<<EOF>>			{
+					mmfatal(PARSE_ERROR, "unterminated /* comment");
+				}
+} /* <xcc,xcsql> */
+
+<SQL>{
+{xbstart}		{
 					token_start = yytext;
 					BEGIN(xb);
 					startlit();
 					addlitchar('b');
 				}
+} /* <SQL> */
+
 <xb>{quotestop}	|
 <xb>{quotefail} {
 					yyless(1);
@@ -458,11 +493,14 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					base_yylval.str = mm_strdup(literalbuf);
 					return BCONST;
 				}
-
 <xh>{xhinside}	|
-<xb>{xbinside}	{ addlit(yytext, yyleng); }
+<xb>{xbinside}	{
+					addlit(yytext, yyleng);
+				}
 <xh>{quotecontinue}	|
-<xb>{quotecontinue}	{ /* ignore */ }
+<xb>{quotecontinue}	{
+					/* ignore */
+				}
 <xb><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated bit string literal"); }

 <SQL>{xhstart}	{
@@ -480,7 +518,16 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 				}

 <xh><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); }
-<SQL>{xnstart} {
+
+<C>{xqstart}	{
+					token_start = yytext;
+					state_before = YYSTATE;
+					BEGIN(xqc);
+					startlit();
+				}
+
+<SQL>{
+{xnstart}		{
 					/* National character.
 					 * Transfer it as-is to the backend.
 					 */
@@ -489,31 +536,28 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					BEGIN(xn);
 					startlit();
 				}
-<C>{xqstart}	{
-				token_start = yytext;
-				state_before = YYSTATE;
-				BEGIN(xqc);
-				startlit();
-			}
-<SQL>{xqstart}	{
+
+{xqstart}		{
 					token_start = yytext;
 					state_before = YYSTATE;
 					BEGIN(xq);
 					startlit();
 				}
-<SQL>{xestart}	{
+{xestart}		{
 					token_start = yytext;
 					state_before = YYSTATE;
 					BEGIN(xe);
 					startlit();
 				}
-<SQL>{xusstart}	{
+{xusstart}		{
 					token_start = yytext;
 					state_before = YYSTATE;
 					BEGIN(xus);
 					startlit();
 					addlit(yytext, yyleng);
 				}
+} /* <SQL> */
+
 <xq,xqc>{quotestop} |
 <xq,xqc>{quotefail} {
 					yyless(1);
@@ -547,24 +591,32 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					addlitchar('\'');
 				}
 <xq,xqc,xn,xus>{xqinside}	{ addlit(yytext, yyleng); }
-<xe>{xeinside}		{ addlit(yytext, yyleng); }
-<xe>{xeunicode}		{ addlit(yytext, yyleng); }
-<xe>{xeescape}		{ addlit(yytext, yyleng); }
-<xe>{xeoctesc}		{ addlit(yytext, yyleng); }
-<xe>{xehexesc}		{ addlit(yytext, yyleng); }
-<xq,xqc,xe,xn,xus>{quotecontinue}	{ /* ignore */ }
+<xe>{xeinside}  {
+					addlit(yytext, yyleng);
+				}
+<xe>{xeunicode} {
+					addlit(yytext, yyleng);
+				}
+<xe>{xeescape}  {
+					addlit(yytext, yyleng);
+				}
+<xe>{xeoctesc}  {
+					addlit(yytext, yyleng);
+				}
+<xe>{xehexesc}  {
+					addlit(yytext, yyleng);
+				}
+<xq,xqc,xe,xn,xus>{quotecontinue}	{
+					/* ignore */
+				}
 <xe>.			{
 					/* This is only needed for \ just before EOF */
 					addlitchar(yytext[0]);
 				}
 <xq,xqc,xe,xn,xus><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated quoted string"); }
-<SQL>{dolqfailed}	{
-				/* throw back all but the initial "$" */
-				yyless(1);
-				/* and treat it as {other} */
-				return yytext[0];
-			}
-<SQL>{dolqdelim} {
+
+<SQL>{
+{dolqdelim}		{
 					token_start = yytext;
 					if (dolqstart)
 						free(dolqstart);
@@ -573,6 +625,14 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					startlit();
 					addlit(yytext, yyleng);
 				}
+{dolqfailed}	{
+					/* throw back all but the initial "$" */
+					yyless(1);
+					/* and treat it as {other} */
+					return yytext[0];
+				}
+} /* <SQL> */
+
 <xdolq>{dolqdelim} {
 					if (strcmp(yytext, dolqstart) == 0)
 					{
@@ -590,28 +650,36 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						 * the $... part to the output, but put back the final
 						 * $ for rescanning.  Consider $delim$...$junk$delim$
 						 */
-					addlit(yytext, yyleng-1);
-					yyless(yyleng-1);
+						addlit(yytext, yyleng - 1);
+						yyless(yyleng - 1);
 					}
 				}
-<xdolq>{dolqinside}	{ addlit(yytext, yyleng); }
-<xdolq>{dolqfailed}	{ addlit(yytext, yyleng); }
-<xdolq>{other}		{
+<xdolq>{dolqinside} {
+					addlit(yytext, yyleng);
+				}
+<xdolq>{dolqfailed} {
+					addlit(yytext, yyleng);
+				}
+<xdolq>.		{
 					/* single quote or dollar sign */
 					addlitchar(yytext[0]);
 				}
-<xdolq><<EOF>>		{ base_yyerror("unterminated dollar-quoted string"); }
-<SQL>{xdstart}		{
+<xdolq><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated dollar-quoted string"); }
+
+<SQL>{
+{xdstart}		{
 					state_before = YYSTATE;
 					BEGIN(xd);
 					startlit();
 				}
-<SQL>{xuistart}		{
+{xuistart}		{
 					state_before = YYSTATE;
 					BEGIN(xui);
 					startlit();
 					addlit(yytext, yyleng);
 				}
+} /* <SQL> */
+
 <xd>{xdstop}	{
 					BEGIN(state_before);
 					if (literallen == 0)
@@ -634,24 +702,59 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					base_yylval.str = mm_strdup(literalbuf);
 					return UIDENT;
 				}
-<xd,xui>{xddouble}		{ addlitchar('"'); }
-<xd,xui>{xdinside}		{ addlit(yytext, yyleng); }
-<xd,xdc,xui><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
-<C,SQL>{xdstart}	{
+<xd,xui>{xddouble}	{
+					addlitchar('"');
+				}
+<xd,xui>{xdinside}	{
+					addlit(yytext, yyleng);
+				}
+<xd,xui><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
+<C>{xdstart}	{
 					state_before = YYSTATE;
 					BEGIN(xdc);
 					startlit();
 				}
-<xdc>{xdcinside}	{ addlit(yytext, yyleng); }
-<SQL>{typecast}		{ return TYPECAST; }
-<SQL>{dot_dot}		{ return DOT_DOT; }
-<SQL>{colon_equals}	{ return COLON_EQUALS; }
-<SQL>{equals_greater} { return EQUALS_GREATER; }
-<SQL>{less_equals}	{ return LESS_EQUALS; }
-<SQL>{greater_equals} { return GREATER_EQUALS; }
-<SQL>{less_greater}	{ return NOT_EQUALS; }
-<SQL>{not_equals}	{ return NOT_EQUALS; }
-<SQL>{informix_special}	{
+<xdc>{xdcinside}	{
+					addlit(yytext, yyleng);
+				}
+<xdc><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated quoted string"); }
+
+<SQL>{
+{typecast}		{
+					return TYPECAST;
+				}
+
+{dot_dot}		{
+					return DOT_DOT;
+				}
+
+{colon_equals}	{
+					return COLON_EQUALS;
+				}
+
+{equals_greater} {
+					return EQUALS_GREATER;
+				}
+
+{less_equals}	{
+					return LESS_EQUALS;
+				}
+
+{greater_equals} {
+					return GREATER_EQUALS;
+				}
+
+{less_greater}	{
+					/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
+					return NOT_EQUALS;
+				}
+
+{not_equals}	{
+					/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
+					return NOT_EQUALS;
+				}
+
+{informix_special}	{
 			  /* are we simulating Informix? */
 				if (INFORMIX_MODE)
 				{
@@ -660,7 +763,9 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 				else
 					return yytext[0];
 				}
-<SQL>{self}			{ /*
+
+{self}			{
+					/*
 					 * We may find a ';' inside a structure
 					 * definition in a TYPE or VAR statement.
 					 * This is not an EOL marker.
@@ -669,7 +774,8 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						BEGIN(C);
 					return yytext[0];
 				}
-<SQL>{operator}		{
+
+{operator}		{
 					/*
 					 * Check for embedded slash-star or dash-dash; those
 					 * are comment starts, so operator must stop there.
@@ -765,52 +871,61 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					base_yylval.str = mm_strdup(yytext);
 					return Op;
 				}
-<SQL>{param}		{
+
+{param}			{
 					base_yylval.ival = atol(yytext+1);
 					return PARAM;
 				}
-<C,SQL>{integer}	{
-						int val;
-						char* endptr;

-						errno = 0;
-						val = strtoint(yytext, &endptr, 10);
-						if (*endptr != '\0' || errno == ERANGE)
-						{
-							errno = 0;
-							base_yylval.str = mm_strdup(yytext);
-							return FCONST;
-						}
-						base_yylval.ival = val;
-						return ICONST;
-					}
-<SQL>{ip}			{
+{ip}			{
 					base_yylval.str = mm_strdup(yytext);
 					return IP;
 				}
-<C,SQL>{decimal}	{
+}  /* <SQL> */
+
+<C,SQL>{
+{integer}		{
+					return process_integer_literal(yytext, &base_yylval);
+				}
+{decimal}		{
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-<C,SQL>{real}		{
+{decimalfail}	{
+					/* throw back the .., and treat as integer */
+					yyless(yyleng - 2);
+					return process_integer_literal(yytext, &base_yylval);
+				}
+{real}			{
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-<SQL>{realfail1}	{
-						yyless(yyleng-1);
+{realfail1}		{
+					/*
+					 * throw back the [Ee], and treat as {decimal}.  Note
+					 * that it is possible the input is actually {integer},
+					 * but since this case will almost certainly lead to a
+					 * syntax error anyway, we don't bother to distinguish.
+					 */
+					yyless(yyleng - 1);
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-<SQL>{realfail2}	{
-						yyless(yyleng-2);
+{realfail2}		{
+					/* throw back the [Ee][+-], and proceed as above */
+					yyless(yyleng - 2);
 					base_yylval.str = mm_strdup(yytext);
 					return FCONST;
 				}
-<SQL>:{identifier}((("->"|\.){identifier})|(\[{array}\]))*	{
+} /* <C,SQL> */
+
+<SQL>{
+:{identifier}((("->"|\.){identifier})|(\[{array}\]))*	{
 					base_yylval.str = mm_strdup(yytext+1);
 					return CVARIABLE;
 				}
-<SQL>{identifier}	{
+
+{identifier}	{
 					const ScanKeyword  *keyword;

 					if (!isdefine())
@@ -837,7 +952,16 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						return IDENT;
 					}
 				}
-<SQL>{other}		{ return yytext[0]; }
+
+{other}			{
+					return yytext[0];
+				}
+} /* <SQL> */
+
+	/*
+	 * Begin ECPG-specific rules
+	 */
+
 <C>{exec_sql}		{ BEGIN(SQL); return SQL_START; }
 <C>{informix_special}	{
 						/* are we simulating Informix? */
@@ -1288,6 +1412,7 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+

 					}
 				}
+
 <INITIAL>{other}|\n	{ mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <pgsql-bugs@postgresql.org>"); }

 %%
@@ -1350,6 +1475,24 @@ addlitchar(unsigned char ychar)
 	literalbuf[literallen] = '\0';
 }

+static int
+process_integer_literal(const char *token, YYSTYPE *lval)
+{
+	int			val;
+	char	   *endptr;
+
+	errno = 0;
+	val = strtoint(token, &endptr, 10);
+	if (*endptr != '\0' || errno == ERANGE)
+	{
+		/* integer too large, treat it as a float */
+		lval->str = mm_strdup(token);
+		return FCONST;
+	}
+	lval->ival = val;
+	return ICONST;
+}
+
 static void
 parse_include(void)
 {