ecpg: re-implement preprocessor's string management.

Most productions in the preprocessor grammar construct strings representing SQL or C statements or fragments thereof. Instead of returning these as <str> results of the productions, return them as "location" values, taking advantage of Bison's flexibility about what a location is. We aren't really giving up anything thereby, since ecpg's error reports have always just given line numbers, and that's tracked separately. The advantage of this is that a single instance of the YYLLOC_DEFAULT macro can perform all the work needed by the vast majority of productions, including all the ones made automatically by parse.pl. This avoids having large numbers of effectively-identical productions, which tickles an optimization inefficiency in recent versions of clang. (This patch reduces the compilation time for preproc.o by more than 100-fold with clang 16, and is visibly helpful with gcc too.) The compiled parser is noticeably smaller as well. A disadvantage of this approach is that YYLLOC_DEFAULT is applied before running the production's semantic action (if any). This means it cannot use the method favored by cat_str() of free'ing all the input strings; if the action needs to look at the input strings, it'd be looking at dangling storage. As this stands, therefore, it leaks memory like a sieve. This is already a big patch though, and fixing the memory management seems like a separable problem, so let's leave that for the next step. (This does remove some free() calls that I'd have had to touch anyway, in the expectation that the next step will manage memory reclamation quite differently.) Most of the changes here are mindless substitution of "@N" for "$N" in grammar rules; see the changes to README.parser for an explanation. Discussion: https://postgr.es/m/2011420.1713493114@sss.pgh.pa.us
2025-10-28 11:55:03 +03:00 · 2024-10-14 13:44:42 -04:00
parent 6b00549944
commit a542d5614b
9 changed files with 740 additions and 1204 deletions
--- a/src/interfaces/ecpg/preproc/README.parser
+++ b/src/interfaces/ecpg/preproc/README.parser
@@ -4,8 +4,8 @@ Some notes:

 1) Most input matching core grammar productions is simply converted
   to strings and concatenated together to form the SQL string
-   passed to the server.  parse.pl can automatically build the
-   grammar actions needed to do this.
+   passed to the server.  This is handled mostly automatically,
+   as described below.
 2) Some grammar rules need special actions that are added to or
   completely override the default token-concatenation behavior.
   This is controlled by ecpg.addons as explained below.
@@ -14,11 +14,31 @@ Some notes:
 4) ecpg.header contains the "prologue" part of preproc.y, including
   support functions, Bison options, etc.
 5) Additional terminals added by ECPG must be defined in ecpg.tokens.
-   Additional nonterminals added by ECPG must be defined in ecpg.type.
+   Additional nonterminals added by ECPG must be defined in ecpg.type,
+   but only if they have non-void result type, which most don't.

 ecpg.header, ecpg.tokens, ecpg.type, and ecpg.trailer are just
 copied verbatim into preproc.y at appropriate points.

+
+In the pre-v18 implementation of ecpg, the strings constructed
+by grammar rules were returned as the Bison result of each rule.
+This led to a large number of effectively-identical rule actions,
+which caused compilation-time problems with some versions of clang.
+Now, rules that need to return a string are declared as having
+void type (which in Bison means leaving out any %type declaration
+for them).  Instead, we abuse Bison's "location tracking" mechanism
+to carry the string results, which allows a single YYLLOC_DEFAULT
+call to handle the standard token-concatenation behavior for the
+vast majority of the rules.  Rules that don't need to do anything
+else can omit a semantic action altogether.  Rules that need to
+construct an output string specially can do so, but they should
+assign it to "@$" rather than the usual "$$"; also, to reference
+the string value of the N'th input token, write "@N" not "$N".
+(But rules that return something other than a simple string
+continue to use the normal Bison notations.)
+
+
 ecpg.addons contains entries that begin with a line like
       ECPG: concattokens ruletype
 and typically have one or more following lines that are the code
@@ -69,9 +89,9 @@ parse.pl contains some tables that list backend grammar
 productions to be ignored or modified.

 Nonterminals that construct strings (as described above) should be
-given <str> type, which is parse.pl's default assumption for
-nonterminals found in gram.y.  That can be overridden at need by
-making an entry in parse.pl's %replace_types table.  %replace_types
+given void type, which is parse.pl's default assumption for
+nonterminals found in gram.y.  If the result should be of some other
+type, make an entry in parse.pl's %replace_types table.  %replace_types
 can also be used to suppress output of a nonterminal's rules
 altogether (in which case ecpg.trailer had better provide replacement
 rules, since the nonterminal will still be referred to elsewhere).
--- a/src/interfaces/ecpg/preproc/ecpg.addons
+++ b/src/interfaces/ecpg/preproc/ecpg.addons
@@ -3,36 +3,35 @@ ECPG: stmtClosePortalStmt block
 	{
 		if (INFORMIX_MODE)
 		{
-			if (pg_strcasecmp($1 + strlen("close "), "database") == 0)
+			if (pg_strcasecmp(@1 + strlen("close "), "database") == 0)
 			{
 				if (connection)
 					mmerror(PARSE_ERROR, ET_ERROR, "AT option not allowed in CLOSE DATABASE statement");

 				fprintf(base_yyout, "{ ECPGdisconnect(__LINE__, \"CURRENT\");");
 				whenever_action(2);
-				free($1);
 				break;
 			}
 		}

-		output_statement($1, 0, ECPGst_normal);
+		output_statement(@1, 0, ECPGst_normal);
 	}
 ECPG: stmtDeallocateStmt block
 	{
-		output_deallocate_prepare_statement($1);
+		output_deallocate_prepare_statement(@1);
 	}
 ECPG: stmtDeclareCursorStmt block
 	{
-		output_simple_statement($1, (strncmp($1, "ECPGset_var", strlen("ECPGset_var")) == 0) ? 4 : 0);
+		output_simple_statement(@1, (strncmp(@1, "ECPGset_var", strlen("ECPGset_var")) == 0) ? 4 : 0);
 	}
 ECPG: stmtDiscardStmt block
 ECPG: stmtFetchStmt block
-	{ output_statement($1, 1, ECPGst_normal); }
+	{ output_statement(@1, 1, ECPGst_normal); }
 ECPG: stmtDeleteStmt block
 ECPG: stmtInsertStmt block
 ECPG: stmtSelectStmt block
 ECPG: stmtUpdateStmt block
-	{ output_statement($1, 1, ECPGst_prepnormal); }
+	{ output_statement(@1, 1, ECPGst_prepnormal); }
 ECPG: stmtExecuteStmt block
 	{
 		check_declared_list($1.name);
@@ -94,50 +93,45 @@ ECPG: stmtPrepareStmt block
 	}
 ECPG: stmtTransactionStmt block
 	{
-		fprintf(base_yyout, "{ ECPGtrans(__LINE__, %s, \"%s\");", connection ? connection : "NULL", $1);
+		fprintf(base_yyout, "{ ECPGtrans(__LINE__, %s, \"%s\");", connection ? connection : "NULL", @1);
 		whenever_action(2);
-		free($1);
 	}
 ECPG: toplevel_stmtTransactionStmtLegacy block
 	{
-		fprintf(base_yyout, "{ ECPGtrans(__LINE__, %s, \"%s\");", connection ? connection : "NULL", $1);
+		fprintf(base_yyout, "{ ECPGtrans(__LINE__, %s, \"%s\");", connection ? connection : "NULL", @1);
 		whenever_action(2);
-		free($1);
 	}
 ECPG: stmtViewStmt rule
 	| ECPGAllocateDescr
 	{
-		fprintf(base_yyout, "ECPGallocate_desc(__LINE__, %s);", $1);
+		fprintf(base_yyout, "ECPGallocate_desc(__LINE__, %s);", @1);
 		whenever_action(0);
-		free($1);
 	}
 	| ECPGConnect
 	{
 		if (connection)
 			mmerror(PARSE_ERROR, ET_ERROR, "AT option not allowed in CONNECT statement");

-		fprintf(base_yyout, "{ ECPGconnect(__LINE__, %d, %s, %d); ", compat, $1, autocommit);
+		fprintf(base_yyout, "{ ECPGconnect(__LINE__, %d, %s, %d); ", compat, @1, autocommit);
 		reset_variables();
 		whenever_action(2);
-		free($1);
 	}
 	| ECPGDeclareStmt
 	{
-		output_simple_statement($1, 0);
+		output_simple_statement(@1, 0);
 	}
 	| ECPGCursorStmt
 	{
-		output_simple_statement($1, (strncmp($1, "ECPGset_var", strlen("ECPGset_var")) == 0) ? 4 : 0);
+		output_simple_statement(@1, (strncmp(@1, "ECPGset_var", strlen("ECPGset_var")) == 0) ? 4 : 0);
 	}
 	| ECPGDeallocateDescr
 	{
-		fprintf(base_yyout, "ECPGdeallocate_desc(__LINE__, %s);", $1);
+		fprintf(base_yyout, "ECPGdeallocate_desc(__LINE__, %s);", @1);
 		whenever_action(0);
-		free($1);
 	}
 	| ECPGDeclare
 	{
-		output_simple_statement($1, 0);
+		output_simple_statement(@1, 0);
 	}
 	| ECPGDescribe
 	{
@@ -157,27 +151,25 @@ ECPG: stmtViewStmt rule
 			mmerror(PARSE_ERROR, ET_ERROR, "AT option not allowed in DISCONNECT statement");

 		fprintf(base_yyout, "{ ECPGdisconnect(__LINE__, %s);",
-				$1 ? $1 : "\"CURRENT\"");
+				@1 ? @1 : "\"CURRENT\"");
 		whenever_action(2);
-		free($1);
 	}
 	| ECPGExecuteImmediateStmt
 	{
-		output_statement($1, 0, ECPGst_exec_immediate);
+		output_statement(@1, 0, ECPGst_exec_immediate);
 	}
 	| ECPGFree
 	{
 		const char *con = connection ? connection : "NULL";

-		if (strcmp($1, "all") == 0)
+		if (strcmp(@1, "all") == 0)
 			fprintf(base_yyout, "{ ECPGdeallocate_all(__LINE__, %d, %s);", compat, con);
-		else if ($1[0] == ':')
-			fprintf(base_yyout, "{ ECPGdeallocate(__LINE__, %d, %s, %s);", compat, con, $1 + 1);
+		else if (@1[0] == ':')
+			fprintf(base_yyout, "{ ECPGdeallocate(__LINE__, %d, %s, %s);", compat, con, @1 + 1);
 		else
-			fprintf(base_yyout, "{ ECPGdeallocate(__LINE__, %d, %s, \"%s\");", compat, con, $1);
+			fprintf(base_yyout, "{ ECPGdeallocate(__LINE__, %d, %s, \"%s\");", compat, con, @1);

 		whenever_action(2);
-		free($1);
 	}
 	| ECPGGetDescriptor
 	{
@@ -188,15 +180,14 @@ ECPG: stmtViewStmt rule
 	}
 	| ECPGGetDescriptorHeader
 	{
-		lookup_descriptor($1, connection);
-		output_get_descr_header($1);
-		free($1);
+		lookup_descriptor(@1, connection);
+		output_get_descr_header(@1);
 	}
 	| ECPGOpen
 	{
 		struct cursor *ptr;

-		if ((ptr = add_additional_variables($1, true)) != NULL)
+		if ((ptr = add_additional_variables(@1, true)) != NULL)
 		{
 			connection = ptr->connection ? mm_strdup(ptr->connection) : NULL;
 			output_statement(mm_strdup(ptr->command), 0, ECPGst_normal);
@@ -205,18 +196,16 @@ ECPG: stmtViewStmt rule
 	}
 	| ECPGSetAutocommit
 	{
-		fprintf(base_yyout, "{ ECPGsetcommit(__LINE__, \"%s\", %s);", $1, connection ? connection : "NULL");
+		fprintf(base_yyout, "{ ECPGsetcommit(__LINE__, \"%s\", %s);", @1, connection ? connection : "NULL");
 		whenever_action(2);
-		free($1);
 	}
 	| ECPGSetConnection
 	{
 		if (connection)
 			mmerror(PARSE_ERROR, ET_ERROR, "AT option not allowed in SET CONNECTION statement");

-		fprintf(base_yyout, "{ ECPGsetconn(__LINE__, %s);", $1);
+		fprintf(base_yyout, "{ ECPGsetconn(__LINE__, %s);", @1);
 		whenever_action(2);
-		free($1);
 	}
 	| ECPGSetDescriptor
 	{
@@ -227,17 +216,15 @@ ECPG: stmtViewStmt rule
 	}
 	| ECPGSetDescriptorHeader
 	{
-		lookup_descriptor($1, connection);
-		output_set_descr_header($1);
-		free($1);
+		lookup_descriptor(@1, connection);
+		output_set_descr_header(@1);
 	}
 	| ECPGTypedef
 	{
 		if (connection)
 			mmerror(PARSE_ERROR, ET_ERROR, "AT option not allowed in TYPE statement");

-		fprintf(base_yyout, "%s", $1);
-		free($1);
+		fprintf(base_yyout, "%s", @1);
 		output_line_number();
 	}
 	| ECPGVar
@@ -245,180 +232,169 @@ ECPG: stmtViewStmt rule
 		if (connection)
 			mmerror(PARSE_ERROR, ET_ERROR, "AT option not allowed in VAR statement");

-		output_simple_statement($1, 0);
+		output_simple_statement(@1, 0);
 	}
 	| ECPGWhenever
 	{
 		if (connection)
 			mmerror(PARSE_ERROR, ET_ERROR, "AT option not allowed in WHENEVER statement");

-		output_simple_statement($1, 0);
+		output_simple_statement(@1, 0);
 	}
 ECPG: where_or_current_clauseWHERECURRENT_POFcursor_name block
 	{
-		char	   *cursor_marker = $4[0] == ':' ? mm_strdup("$0") : $4;
+		char	   *cursor_marker = @4[0] == ':' ? mm_strdup("$0") : @4;

-		$$ = cat_str(2, mm_strdup("where current of"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("where current of"), cursor_marker);
 	}
 ECPG: CopyStmtCOPYopt_binaryqualified_nameopt_column_listcopy_fromopt_programcopy_file_namecopy_delimiteropt_withcopy_optionswhere_clause addon
-		if (strcmp($6, "from") == 0 &&
-			(strcmp($7, "stdin") == 0 || strcmp($7, "stdout") == 0))
+		if (strcmp(@6, "from") == 0 &&
+			(strcmp(@7, "stdin") == 0 || strcmp(@7, "stdout") == 0))
 			mmerror(PARSE_ERROR, ET_WARNING, "COPY FROM STDIN is not implemented");
 ECPG: var_valueNumericOnly addon
-		if ($1[0] == '$')
-		{
-			free($1);
-			$1 = mm_strdup("$0");
-		}
+		if (@1[0] == '$')
+			@$ = mm_strdup("$0");
 ECPG: fetch_argscursor_name addon
-		struct cursor *ptr = add_additional_variables($1, false);
+		struct cursor *ptr = add_additional_variables(@1, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);
-		if ($1[0] == ':')
-		{
-			free($1);
-			$1 = mm_strdup("$0");
-		}
+		if (@1[0] == ':')
+			@$ = mm_strdup("$0");
 ECPG: fetch_argsfrom_incursor_name addon
-		struct cursor *ptr = add_additional_variables($2, false);
+		struct cursor *ptr = add_additional_variables(@2, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);
-		if ($2[0] == ':')
-		{
-			free($2);
-			$2 = mm_strdup("$0");
-		}
+		if (@2[0] == ':')
+			@$ = cat2_str(mm_strdup(@1), mm_strdup("$0"));
 ECPG: fetch_argsNEXTopt_from_incursor_name addon
 ECPG: fetch_argsPRIORopt_from_incursor_name addon
 ECPG: fetch_argsFIRST_Popt_from_incursor_name addon
 ECPG: fetch_argsLAST_Popt_from_incursor_name addon
 ECPG: fetch_argsALLopt_from_incursor_name addon
-		struct cursor *ptr = add_additional_variables($3, false);
+		struct cursor *ptr = add_additional_variables(@3, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);
-		if ($3[0] == ':')
-		{
-			free($3);
-			$3 = mm_strdup("$0");
-		}
+		if (@3[0] == ':')
+			@$ = cat_str(3, mm_strdup(@1), mm_strdup(@2), mm_strdup("$0"));
 ECPG: fetch_argsSignedIconstopt_from_incursor_name addon
-		struct cursor *ptr = add_additional_variables($3, false);
+		struct cursor *ptr = add_additional_variables(@3, false);
+		bool	replace = false;

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);
-		if ($3[0] == ':')
+		if (@3[0] == ':')
 		{
-			free($3);
-			$3 = mm_strdup("$0");
+			@3 = mm_strdup("$0");
+			replace = true;
 		}
-		if ($1[0] == '$')
+		if (@1[0] == '$')
 		{
-			free($1);
-			$1 = mm_strdup("$0");
+			@1 = mm_strdup("$0");
+			replace = true;
 		}
+		if (replace)
+			@$ = cat_str(3, mm_strdup(@1), mm_strdup(@2), mm_strdup(@3));
 ECPG: fetch_argsFORWARDALLopt_from_incursor_name addon
 ECPG: fetch_argsBACKWARDALLopt_from_incursor_name addon
-		struct cursor *ptr = add_additional_variables($4, false);
+		struct cursor *ptr = add_additional_variables(@4, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);
-		if ($4[0] == ':')
-		{
-			free($4);
-			$4 = mm_strdup("$0");
-		}
+		if (@4[0] == ':')
+			@$ = cat_str(4, mm_strdup(@1), mm_strdup(@2), mm_strdup(@3), mm_strdup("$0"));
 ECPG: fetch_argsABSOLUTE_PSignedIconstopt_from_incursor_name addon
 ECPG: fetch_argsRELATIVE_PSignedIconstopt_from_incursor_name addon
 ECPG: fetch_argsFORWARDSignedIconstopt_from_incursor_name addon
 ECPG: fetch_argsBACKWARDSignedIconstopt_from_incursor_name addon
-		struct cursor *ptr = add_additional_variables($4, false);
+		struct cursor *ptr = add_additional_variables(@4, false);
+		bool	replace = false;

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);
-		if ($4[0] == ':')
+		if (@4[0] == ':')
 		{
-			free($4);
-			$4 = mm_strdup("$0");
+			@4 = mm_strdup("$0");
+			replace = true;
 		}
-		if ($2[0] == '$')
+		if (@2[0] == '$')
 		{
-			free($2);
-			$2 = mm_strdup("$0");
+			@2 = mm_strdup("$0");
+			replace = true;
 		}
-ECPG: cursor_namename rule
+		if (replace)
+			@$ = cat_str(4, mm_strdup(@1), mm_strdup(@2), mm_strdup(@3), mm_strdup(@4));
+ECPG: cursor_namename block
 	| char_civar
 	{
-		char	   *curname = mm_alloc(strlen($1) + 2);
+		char	   *curname = mm_alloc(strlen(@1) + 2);

-		sprintf(curname, ":%s", $1);
-		free($1);
-		$1 = curname;
-		$$ = $1;
+		sprintf(curname, ":%s", @1);
+		@$ = curname;
 	}
 ECPG: ExplainableStmtExecuteStmt block
 	{
-		$$ = $1.name;
+		@$ = $1.name;
 	}
 ECPG: PrepareStmtPREPAREprepared_nameprep_type_clauseASPreparableStmt block
 	{
-		$$.name = $2;
-		$$.type = $3;
-		$$.stmt = $5;
+		$$.name = @2;
+		$$.type = @3;
+		$$.stmt = @5;
 	}
 	| PREPARE prepared_name FROM execstring
 	{
-		$$.name = $2;
+		$$.name = @2;
 		$$.type = NULL;
-		$$.stmt = $4;
+		$$.stmt = @4;
 	}
 ECPG: ExecuteStmtEXECUTEprepared_nameexecute_param_clauseexecute_rest block
 	{
-		$$.name = $2;
-		$$.type = $3;
+		$$.name = @2;
+		$$.type = @3;
 	}
 ECPG: ExecuteStmtCREATEOptTempTABLEcreate_as_targetASEXECUTEprepared_nameexecute_param_clauseopt_with_dataexecute_rest block
 	{
-		$$.name = cat_str(8, mm_strdup("create"), $2, mm_strdup("table"), $4, mm_strdup("as execute"), $7, $8, $9);
+		$$.name = @$;
 	}
 ECPG: ExecuteStmtCREATEOptTempTABLEIF_PNOTEXISTScreate_as_targetASEXECUTEprepared_nameexecute_param_clauseopt_with_dataexecute_rest block
 	{
-		$$.name = cat_str(8, mm_strdup("create"), $2, mm_strdup("table if not exists"), $7, mm_strdup("as execute"), $10, $11, $12);
+		$$.name = @$;
 	}
 ECPG: DeclareCursorStmtDECLAREcursor_namecursor_optionsCURSORopt_holdFORSelectStmt block
 	{
 		struct cursor *ptr,
 				   *this;
-		char	   *cursor_marker = $2[0] == ':' ? mm_strdup("$0") : mm_strdup($2);
+		char	   *cursor_marker = @2[0] == ':' ? mm_strdup("$0") : mm_strdup(@2);
 		char	   *comment,
 				   *c1,
 				   *c2;
-		int			(*strcmp_fn) (const char *, const char *) = (($2[0] == ':' || $2[0] == '"') ? strcmp : pg_strcasecmp);
+		int			(*strcmp_fn) (const char *, const char *) = ((@2[0] == ':' || @2[0] == '"') ? strcmp : pg_strcasecmp);

-		if (INFORMIX_MODE && pg_strcasecmp($2, "database") == 0)
+		if (INFORMIX_MODE && pg_strcasecmp(@2, "database") == 0)
 			mmfatal(PARSE_ERROR, "\"database\" cannot be used as cursor name in INFORMIX mode");

 		for (ptr = cur; ptr != NULL; ptr = ptr->next)
 		{
-			if (strcmp_fn($2, ptr->name) == 0)
+			if (strcmp_fn(@2, ptr->name) == 0)
 			{
-				if ($2[0] == ':')
-					mmerror(PARSE_ERROR, ET_ERROR, "using variable \"%s\" in different declare statements is not supported", $2 + 1);
+				if (@2[0] == ':')
+					mmerror(PARSE_ERROR, ET_ERROR, "using variable \"%s\" in different declare statements is not supported", @2 + 1);
 				else
-					mmerror(PARSE_ERROR, ET_ERROR, "cursor \"%s\" is already defined", $2);
+					mmerror(PARSE_ERROR, ET_ERROR, "cursor \"%s\" is already defined", @2);
 			}
 		}

 		this = (struct cursor *) mm_alloc(sizeof(struct cursor));

 		this->next = cur;
-		this->name = $2;
+		this->name = mm_strdup(@2);
 		this->function = (current_function ? mm_strdup(current_function) : NULL);
 		this->connection = connection ? mm_strdup(connection) : NULL;
 		this->opened = false;
-		this->command = cat_str(7, mm_strdup("declare"), cursor_marker, $3, mm_strdup("cursor"), $5, mm_strdup("for"), $7);
+		this->command = cat_str(7, mm_strdup("declare"), cursor_marker, @3, mm_strdup("cursor"), @5, mm_strdup("for"), @7);
 		this->argsinsert = argsinsert;
 		this->argsinsert_oos = NULL;
 		this->argsresult = argsresult;
@@ -435,47 +411,47 @@ ECPG: DeclareCursorStmtDECLAREcursor_namecursor_optionsCURSORopt_holdFORSelectSt
 		}
 		comment = cat_str(3, mm_strdup("/*"), c1, mm_strdup("*/"));

-		$$ = cat2_str(adjust_outofscope_cursor_vars(this), comment);
+		@$ = cat2_str(adjust_outofscope_cursor_vars(this), comment);
 	}
 ECPG: ClosePortalStmtCLOSEcursor_name block
 	{
-		char	   *cursor_marker = $2[0] == ':' ? mm_strdup("$0") : $2;
+		char	   *cursor_marker = @2[0] == ':' ? mm_strdup("$0") : @2;
 		struct cursor *ptr = NULL;

 		for (ptr = cur; ptr != NULL; ptr = ptr->next)
 		{
-			if (strcmp($2, ptr->name) == 0)
+			if (strcmp(@2, ptr->name) == 0)
 			{
 				if (ptr->connection)
 					connection = mm_strdup(ptr->connection);
 				break;
 			}
 		}
-		$$ = cat2_str(mm_strdup("close"), cursor_marker);
+		@$ = cat2_str(mm_strdup("close"), cursor_marker);
 	}
 ECPG: opt_hold block
 	{
 		if (compat == ECPG_COMPAT_INFORMIX_SE && autocommit)
-			$$ = mm_strdup("with hold");
+			@$ = mm_strdup("with hold");
 		else
-			$$ = EMPTY;
+			@$ = EMPTY;
 	}
 ECPG: into_clauseINTOOptTempTableName block
 	{
 		FoundInto = 1;
-		$$ = cat2_str(mm_strdup("into"), $2);
+		@$ = cat2_str(mm_strdup("into"), @2);
 	}
 	| ecpg_into
 	{
-		$$ = EMPTY;
+		@$ = EMPTY;
 	}
 ECPG: TypenameSimpleTypenameopt_array_bounds block
 	{
-		$$ = cat2_str($1, $2.str);
+		@$ = cat2_str(@1, $2.str);
 	}
 ECPG: TypenameSETOFSimpleTypenameopt_array_bounds block
 	{
-		$$ = cat_str(3, mm_strdup("setof"), $2, $3.str);
+		@$ = cat_str(3, mm_strdup("setof"), @2, $3.str);
 	}
 ECPG: opt_array_boundsopt_array_bounds'['']' block
 	{
@@ -492,10 +468,10 @@ ECPG: opt_array_boundsopt_array_bounds'['']' block
 		$$.index1 = $1.index1;
 		$$.index2 = $1.index2;
 		if (strcmp($1.index1, "-1") == 0)
-			$$.index1 = mm_strdup($3);
+			$$.index1 = mm_strdup(@3);
 		else if (strcmp($1.index2, "-1") == 0)
-			$$.index2 = mm_strdup($3);
-		$$.str = cat_str(4, $1.str, mm_strdup("["), $3, mm_strdup("]"));
+			$$.index2 = mm_strdup(@3);
+		$$.str = cat_str(4, $1.str, mm_strdup("["), @3, mm_strdup("]"));
 	}
 ECPG: opt_array_bounds block
 	{
@@ -505,108 +481,100 @@ ECPG: opt_array_bounds block
 	}
 ECPG: IconstICONST block
 	{
-		$$ = make_name();
+		@$ = make_name();
 	}
 ECPG: AexprConstNULL_P rule
-	| civar							{ $$ = $1; }
-	| civarind						{ $$ = $1; }
+	| civar
+	| civarind
 ECPG: VariableShowStmtSHOWALL block
 	{
 		mmerror(PARSE_ERROR, ET_ERROR, "SHOW ALL is not implemented");
-		$$ = EMPTY;
 	}
 ECPG: FetchStmtMOVEfetch_args rule
 	| FETCH fetch_args ecpg_fetch_into
-	{
-		$$ = cat2_str(mm_strdup("fetch"), $2);
-	}
 	| FETCH FORWARD cursor_name opt_ecpg_fetch_into
 	{
-		char	   *cursor_marker = $3[0] == ':' ? mm_strdup("$0") : $3;
-		struct cursor *ptr = add_additional_variables($3, false);
+		char	   *cursor_marker = @3[0] == ':' ? mm_strdup("$0") : @3;
+		struct cursor *ptr = add_additional_variables(@3, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);

-		$$ = cat_str(2, mm_strdup("fetch forward"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("fetch forward"), cursor_marker);
 	}
 	| FETCH FORWARD from_in cursor_name opt_ecpg_fetch_into
 	{
-		char	   *cursor_marker = $4[0] == ':' ? mm_strdup("$0") : $4;
-		struct cursor *ptr = add_additional_variables($4, false);
+		char	   *cursor_marker = @4[0] == ':' ? mm_strdup("$0") : @4;
+		struct cursor *ptr = add_additional_variables(@4, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);

-		$$ = cat_str(2, mm_strdup("fetch forward from"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("fetch forward from"), cursor_marker);
 	}
 	| FETCH BACKWARD cursor_name opt_ecpg_fetch_into
 	{
-		char	   *cursor_marker = $3[0] == ':' ? mm_strdup("$0") : $3;
-		struct cursor *ptr = add_additional_variables($3, false);
+		char	   *cursor_marker = @3[0] == ':' ? mm_strdup("$0") : @3;
+		struct cursor *ptr = add_additional_variables(@3, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);

-		$$ = cat_str(2, mm_strdup("fetch backward"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("fetch backward"), cursor_marker);
 	}
 	| FETCH BACKWARD from_in cursor_name opt_ecpg_fetch_into
 	{
-		char	   *cursor_marker = $4[0] == ':' ? mm_strdup("$0") : $4;
-		struct cursor *ptr = add_additional_variables($4, false);
+		char	   *cursor_marker = @4[0] == ':' ? mm_strdup("$0") : @4;
+		struct cursor *ptr = add_additional_variables(@4, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);

-		$$ = cat_str(2, mm_strdup("fetch backward from"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("fetch backward from"), cursor_marker);
 	}
 	| MOVE FORWARD cursor_name
 	{
-		char	   *cursor_marker = $3[0] == ':' ? mm_strdup("$0") : $3;
-		struct cursor *ptr = add_additional_variables($3, false);
+		char	   *cursor_marker = @3[0] == ':' ? mm_strdup("$0") : @3;
+		struct cursor *ptr = add_additional_variables(@3, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);

-		$$ = cat_str(2, mm_strdup("move forward"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("move forward"), cursor_marker);
 	}
 	| MOVE FORWARD from_in cursor_name
 	{
-		char	   *cursor_marker = $4[0] == ':' ? mm_strdup("$0") : $4;
-		struct cursor *ptr = add_additional_variables($4, false);
+		char	   *cursor_marker = @4[0] == ':' ? mm_strdup("$0") : @4;
+		struct cursor *ptr = add_additional_variables(@4, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);

-		$$ = cat_str(2, mm_strdup("move forward from"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("move forward from"), cursor_marker);
 	}
 	| MOVE BACKWARD cursor_name
 	{
-		char	   *cursor_marker = $3[0] == ':' ? mm_strdup("$0") : $3;
-		struct cursor *ptr = add_additional_variables($3, false);
+		char	   *cursor_marker = @3[0] == ':' ? mm_strdup("$0") : @3;
+		struct cursor *ptr = add_additional_variables(@3, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);

-		$$ = cat_str(2, mm_strdup("move backward"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("move backward"), cursor_marker);
 	}
 	| MOVE BACKWARD from_in cursor_name
 	{
-		char	   *cursor_marker = $4[0] == ':' ? mm_strdup("$0") : $4;
-		struct cursor *ptr = add_additional_variables($4, false);
+		char	   *cursor_marker = @4[0] == ':' ? mm_strdup("$0") : @4;
+		struct cursor *ptr = add_additional_variables(@4, false);

 		if (ptr->connection)
 			connection = mm_strdup(ptr->connection);

-		$$ = cat_str(2, mm_strdup("move backward from"), cursor_marker);
+		@$ = cat_str(2, mm_strdup("move backward from"), cursor_marker);
 	}
 ECPG: limit_clauseLIMITselect_limit_value','select_offset_value block
 	{
 		mmerror(PARSE_ERROR, ET_WARNING, "no longer supported LIMIT #,# syntax passed to server");
-		$$ = cat_str(4, mm_strdup("limit"), $2, mm_strdup(","), $4);
 	}
 ECPG: SignedIconstIconst rule
 	| civar
-	{
-		$$ = $1;
-	}
--- a/src/interfaces/ecpg/preproc/ecpg.header
+++ b/src/interfaces/ecpg/preproc/ecpg.header
@@ -13,14 +13,6 @@
 extern int base_yychar;
 extern int base_yynerrs;

-/* Location tracking support --- simpler than bison's default */
-#define YYLLOC_DEFAULT(Current, Rhs, N) \
-	do { \
-		if (N)						\
-			(Current) = (Rhs)[1];	\
-		else						\
-			(Current) = (Rhs)[0];	\
-	} while (0)

 /*
 * The %name-prefix option below will make bison call base_yylex, but we
@@ -200,6 +192,61 @@ make3_str(char *str1, char *str2, char *str3)
 	return res_str;
 }

+/*
+ * "Location tracking" support.  We commandeer Bison's location tracking
+ * mechanism to manage the output string for productions that ordinarily would
+ * return a <str> result.  This allows the majority of those productions to
+ * have default semantic actions, reducing the size of the parser, and also
+ * greatly reducing its compilation time on some versions of clang.
+ *
+ * To do this, we make YYLTYPE be a pointer to a malloc'd string, and then
+ * merge the location strings of the input tokens in the default YYLLOC
+ * computation.  Productions that are okay with the standard merge need not
+ * do anything more; otherwise, they can override it by assigning to @$.
+ */
+#define YYLLOC_DEFAULT(Current, Rhs, N) yylloc_default(&(Current), Rhs, N)
+
+static void
+yylloc_default(YYLTYPE *target, YYLTYPE *rhs, int N)
+{
+	if (N > 1)
+	{
+		/* Concatenate non-empty inputs with one space between them */
+		char	   *result,
+				   *ptr;
+		size_t		needed = 0;
+
+		for (int i = 1; i <= N; i++)
+		{
+			size_t		thislen = strlen(rhs[i]);
+
+			if (needed > 0 && thislen > 0)
+				needed++;
+			needed += thislen;
+		}
+		result = (char *) mm_alloc(needed + 1);
+		ptr = result;
+		for (int i = 1; i <= N; i++)
+		{
+			size_t		thislen = strlen(rhs[i]);
+
+			if (ptr > result && thislen > 0)
+				*ptr++ = ' ';
+			memcpy(ptr, rhs[i], thislen);
+			ptr += thislen;
+		}
+		*ptr = '\0';
+		*target = result;
+	}
+	else if (N == 1)
+	{
+		/* Just re-use the single input */
+		*target = rhs[1];
+	}
+	else
+		*target = EMPTY;
+}
+
 /* and the rest */
 static char *
 make_name(void)
--- a/src/interfaces/ecpg/preproc/ecpg.trailer
+++ b/src/interfaces/ecpg/preproc/ecpg.trailer
--- a/src/interfaces/ecpg/preproc/ecpg.type
+++ b/src/interfaces/ecpg/preproc/ecpg.type
@@ -1,131 +1,4 @@
 /* src/interfaces/ecpg/preproc/ecpg.type */
-%type <str> ECPGAllocateDescr
-%type <str> ECPGCKeywords
-%type <str> ECPGColId
-%type <str> ECPGColLabel
-%type <str> ECPGConnect
-%type <str> ECPGCursorStmt
-%type <str> ECPGDeallocateDescr
-%type <str> ECPGDeclaration
-%type <str> ECPGDeclare
-%type <str> ECPGDeclareStmt
-%type <str> ECPGDisconnect
-%type <str> ECPGExecuteImmediateStmt
-%type <str> ECPGFree
-%type <str> ECPGGetDescHeaderItem
-%type <str> ECPGGetDescItem
-%type <str> ECPGGetDescriptorHeader
-%type <str> ECPGKeywords
-%type <str> ECPGKeywords_rest
-%type <str> ECPGKeywords_vanames
-%type <str> ECPGOpen
-%type <str> ECPGSetAutocommit
-%type <str> ECPGSetConnection
-%type <str> ECPGSetDescHeaderItem
-%type <str> ECPGSetDescItem
-%type <str> ECPGSetDescriptorHeader
-%type <str> ECPGTypeName
-%type <str> ECPGTypedef
-%type <str> ECPGVar
-%type <str> ECPGVarDeclaration
-%type <str> ECPGWhenever
-%type <str> ECPGunreserved_interval
-%type <str> UsingConst
-%type <str> UsingValue
-%type <str> all_unreserved_keyword
-%type <str> c_anything
-%type <str> c_args
-%type <str> c_list
-%type <str> c_stuff
-%type <str> c_stuff_item
-%type <str> c_term
-%type <str> c_thing
-%type <str> char_variable
-%type <str> char_civar
-%type <str> civar
-%type <str> civarind
-%type <str> ColId
-%type <str> ColLabel
-%type <str> connect_options
-%type <str> connection_object
-%type <str> connection_target
-%type <str> coutputvariable
-%type <str> cvariable
-%type <str> db_prefix
-%type <str> CreateAsStmt
-%type <str> DeallocateStmt
-%type <str> dis_name
-%type <str> ecpg_bconst
-%type <str> ecpg_fconst
-%type <str> ecpg_ident
-%type <str> ecpg_interval
-%type <str> ecpg_into
-%type <str> ecpg_fetch_into
-%type <str> ecpg_param
-%type <str> ecpg_sconst
-%type <str> ecpg_using
-%type <str> ecpg_xconst
-%type <str> enum_definition
-%type <str> enum_type
-%type <str> execstring
-%type <str> execute_rest
-%type <str> indicator
-%type <str> into_descriptor
-%type <str> into_sqlda
-%type <str> Iresult
-%type <str> on_off
-%type <str> opt_bit_field
-%type <str> opt_connection_name
-%type <str> opt_database_name
-%type <str> opt_ecpg_into
-%type <str> opt_ecpg_fetch_into
-%type <str> opt_ecpg_using
-%type <str> opt_initializer
-%type <str> opt_options
-%type <str> opt_output
-%type <str> opt_pointer
-%type <str> opt_port
-%type <str> opt_reference
-%type <str> opt_scale
-%type <str> opt_server
-%type <str> opt_user
-%type <str> opt_opt_value
-%type <str> ora_user
-%type <str> precision
-%type <str> prepared_name
-%type <str> quoted_ident_stringvar
-%type <str> s_struct_union
-%type <str> server
-%type <str> server_name
-%type <str> single_vt_declaration
-%type <str> storage_clause
-%type <str> storage_declaration
-%type <str> storage_modifier
-%type <str> struct_union_type
-%type <str> struct_union_type_with_symbol
-%type <str> symbol
-%type <str> type_declaration
-%type <str> type_function_name
-%type <str> user_name
-%type <str> using_descriptor
-%type <str> var_declaration
-%type <str> var_type_declarations
-%type <str> variable
-%type <str> variable_declarations
-%type <str> variable_list
-%type <str> vt_declarations
-
-%type <str> Op
-%type <str> IntConstVar
-%type <str> AllConstVar
-%type <str> CSTRING
-%type <str> CPP_LINE
-%type <str> CVARIABLE
-%type <str> BCONST
-%type <str> SCONST
-%type <str> XCONST
-%type <str> IDENT
-
 %type  <struct_union> s_struct_union_symbol

 %type  <descriptor> ECPGGetDescriptor
--- a/src/interfaces/ecpg/preproc/output.c
+++ b/src/interfaces/ecpg/preproc/output.c
@@ -4,7 +4,7 @@

 #include "preproc_extern.h"

-static void output_escaped_str(char *str, bool quoted);
+static void output_escaped_str(const char *str, bool quoted);

 void
 output_line_number(void)
@@ -16,13 +16,12 @@ output_line_number(void)
 }

 void
-output_simple_statement(char *stmt, int whenever_mode)
+output_simple_statement(const char *stmt, int whenever_mode)
 {
 	output_escaped_str(stmt, false);
 	if (whenever_mode)
 		whenever_action(whenever_mode);
 	output_line_number();
-	free(stmt);
 }


@@ -133,7 +132,7 @@ static char *ecpg_statement_type_name[] = {
 };

 void
-output_statement(char *stmt, int whenever_mode, enum ECPG_statement_type st)
+output_statement(const char *stmt, int whenever_mode, enum ECPG_statement_type st)
 {
 	fprintf(base_yyout, "{ ECPGdo(__LINE__, %d, %d, %s, %d, ", compat, force_indicator, connection ? connection : "NULL", questionmarks);

@@ -163,11 +162,10 @@ output_statement(char *stmt, int whenever_mode, enum ECPG_statement_type st)
 	reset_variables();

 	whenever_action(whenever_mode | 2);
-	free(stmt);
 }

 void
-output_prepare_statement(char *name, char *stmt)
+output_prepare_statement(const char *name, const char *stmt)
 {
 	fprintf(base_yyout, "{ ECPGprepare(__LINE__, %s, %d, ", connection ? connection : "NULL", questionmarks);
 	output_escaped_str(name, true);
@@ -175,11 +173,10 @@ output_prepare_statement(char *name, char *stmt)
 	output_escaped_str(stmt, true);
 	fputs(");", base_yyout);
 	whenever_action(2);
-	free(name);
 }

 void
-output_deallocate_prepare_statement(char *name)
+output_deallocate_prepare_statement(const char *name)
 {
 	const char *con = connection ? connection : "NULL";

@@ -193,11 +190,10 @@ output_deallocate_prepare_statement(char *name)
 		fprintf(base_yyout, "{ ECPGdeallocate_all(__LINE__, %d, %s);", compat, con);

 	whenever_action(2);
-	free(name);
 }

 static void
-output_escaped_str(char *str, bool quoted)
+output_escaped_str(const char *str, bool quoted)
 {
 	int			i = 0;
 	int			len = strlen(str);
--- a/src/interfaces/ecpg/preproc/parse.pl
+++ b/src/interfaces/ecpg/preproc/parse.pl
@@ -44,27 +44,10 @@ my %replace_token = (
 	'IDENT' => 'ecpg_ident',
 	'PARAM' => 'ecpg_param',);

-# Substitutions to apply to terminal token names to reconstruct the
-# literal form of the token.  (There is also a hard-wired substitution
-# rule that strips trailing '_P'.)
-my %replace_string = (
-	'FORMAT_LA' => 'format',
-	'NOT_LA' => 'not',
-	'NULLS_LA' => 'nulls',
-	'WITH_LA' => 'with',
-	'WITHOUT_LA' => 'without',
-	'TYPECAST' => '::',
-	'DOT_DOT' => '..',
-	'COLON_EQUALS' => ':=',
-	'EQUALS_GREATER' => '=>',
-	'LESS_EQUALS' => '<=',
-	'GREATER_EQUALS' => '>=',
-	'NOT_EQUALS' => '<>',);
-
-# This hash can provide a result type to override '<str>' for nonterminals
+# This hash can provide a result type to override "void" for nonterminals
 # that need that, or it can specify 'ignore' to cause us to skip the rule
-# for that nonterminal.  (In that case, ecpg.trailer had better provide
-# a substitute rule.)
+# for that nonterminal.  (In either case, ecpg.trailer had better provide
+# a substitute rule, since the default won't do.)
 my %replace_types = (
 	'PrepareStmt' => '<prep>',
 	'ExecuteStmt' => '<exec>',
@@ -175,11 +158,8 @@ my $non_term_id;
 # we plan to emit for the current rule.
 my $line = '';

-# @fields holds the items to be emitted in the token-concatenation action
-# for the current rule (assuming we emit one).  "$N" refers to the N'th
-# input token of the rule; anything else is a string to emit literally.
-# (We assume no such string can need to start with '$'.)
-my @fields;
+# count of tokens included in $line.
+my $line_count = 0;


 # Open parser / output file early, to raise errors early.
@@ -244,10 +224,6 @@ sub main
 			$has_if_command = 1 if /^\s*if/;
 		}

-		# We track %prec per-line, not per-rule, which is not quite right
-		# but there are no counterexamples in gram.y at present.
-		my $prec = 0;
-
 		# Make sure any braces are split into separate fields
 		s/{/ { /g;
 		s/}/ } /g;
@@ -296,7 +272,7 @@ sub main
 				}

 				# If it's "<something>", it's a type in a %token declaration,
-				# which we can just drop.
+				# which we should just drop so that the tokens have void type.
 				if (substr($a, 0, 1) eq '<')
 				{
 					next;
@@ -376,7 +352,7 @@ sub main
 				if ($copymode)
 				{
 					# Print the accumulated rule.
-					emit_rule(\@fields);
+					emit_rule();
 					add_to_buffer('rules', ";\n\n");
 				}
 				else
@@ -386,8 +362,8 @@ sub main
 				}

 				# Reset for the next rule.
-				@fields = ();
 				$line = '';
+				$line_count = 0;
 				$in_rule = 0;
 				$alt_count = 0;
 				$has_feature_not_supported = 0;
@@ -401,11 +377,10 @@ sub main
 				{
 					# Print the accumulated alternative.
 					# Increment $alt_count for each non-ignored alternative.
-					$alt_count += emit_rule(\@fields);
+					$alt_count += emit_rule();
 				}

 				# Reset for the next alternative.
-				@fields = ();
 				# Start the next line with '|' if we've printed at least one
 				# alternative.
 				if ($alt_count > 1)
@@ -416,6 +391,7 @@ sub main
 				{
 					$line = '';
 				}
+				$line_count = 0;
 				$has_feature_not_supported = 0;
 				$has_if_command = 0;
 				next;
@@ -444,13 +420,9 @@ sub main
 					$fieldIndexer++;
 				}

-				# Check for %replace_types override of nonterminal's type
-				if (not defined $replace_types{$non_term_id})
-				{
-					# By default, the type is <str>
-					$replace_types{$non_term_id} = '<str>';
-				}
-				elsif ($replace_types{$non_term_id} eq 'ignore')
+				# Check for %replace_types entry indicating to ignore it.
+				if (defined $replace_types{$non_term_id}
+					&& $replace_types{$non_term_id} eq 'ignore')
 				{
 					# We'll ignore this nonterminal and rule altogether.
 					$copymode = 0;
@@ -470,22 +442,26 @@ sub main
 					$stmt_mode = 0;
 				}

-				# Emit appropriate %type declaration for this nonterminal.
-				my $tstr =
-					'%type '
-				  . $replace_types{$non_term_id} . ' '
-				  . $non_term_id;
-				add_to_buffer('types', $tstr);
+				# Emit appropriate %type declaration for this nonterminal,
+				# if it has a type; otherwise omit that.
+				if (defined $replace_types{$non_term_id})
+				{
+					my $tstr =
+						'%type '
+					  . $replace_types{$non_term_id} . ' '
+					  . $non_term_id;
+					add_to_buffer('types', $tstr);
+				}

 				# Emit the target part of the rule.
 				# Note: the leading space is just to match
 				# the rather weird pre-v18 output logic.
-				$tstr = ' ' . $non_term_id . ':';
+				my $tstr = ' ' . $non_term_id . ':';
 				add_to_buffer('rules', $tstr);

-				# Prepare for reading the fields (tokens) of the rule.
+				# Prepare for reading the tokens of the rule.
 				$line = '';
-				@fields = ();
+				$line_count = 0;
 				die "unterminated rule at grammar line $.\n"
 				  if $in_rule;
 				$in_rule = 1;
@@ -496,48 +472,7 @@ sub main
 			{
 				# Not a nonterminal declaration, so just add it to $line.
 				$line = $line . ' ' . $arr[$fieldIndexer];
-			}
-
-			# %prec and whatever follows it should get added to $line,
-			# but not to @fields.
-			if ($arr[$fieldIndexer] eq '%prec')
-			{
-				$prec = 1;
-				next;
-			}
-
-			# Emit transformed version of token to @fields if appropriate.
-			if (   $copymode
-				&& !$prec
-				&& !$comment
-				&& $in_rule)
-			{
-				my $S = $arr[$fieldIndexer];
-
-				# If it's a known terminal token (other than Op) or a literal
-				# character, we need to emit the equivalent string, which'll
-				# later get wrapped into a C string literal, perhaps after
-				# merging with adjacent strings.
-				if ($S ne 'Op'
-					&& (defined $tokens{$S}
-						|| $S =~ /^'.+'$/))
-				{
-					# Apply replace_string substitution if any.
-					$S = $replace_string{$S} if (exists $replace_string{$S});
-					# Automatically strip _P if present.
-					$S =~ s/_P$//;
-					# And get rid of quotes if it's a literal character.
-					$S =~ tr/'//d;
-					# Finally, downcase and push into @fields.
-					push(@fields, lc($S));
-				}
-				else
-				{
-					# Otherwise, push a $N reference to this input token.
-					# (We assume this cannot be confused with anything the
-					# above code would produce.)
-					push(@fields, '$' . (scalar(@fields) + 1));
-				}
+				$line_count++;
 			}
 		}
 	}
@@ -568,13 +503,13 @@ sub include_file
 # by an ecpg.addons entry.
 sub emit_rule_action
 {
-	my ($tag, $fields) = @_;
+	my ($tag) = @_;

 	# See if we have an addons entry; if not, just emit default action
 	my $rec = $addons{$tag};
 	if (!$rec)
 	{
-		emit_default_action($fields, 0);
+		emit_default_action(0);
 		return;
 	}

@@ -585,7 +520,7 @@ sub emit_rule_action
 	if ($rectype eq 'rule')
 	{
 		# Emit default action and then the code block.
-		emit_default_action($fields, 0);
+		emit_default_action(0);
 	}
 	elsif ($rectype eq 'addon')
 	{
@@ -600,7 +535,7 @@ sub emit_rule_action

 	if ($rectype eq 'addon')
 	{
-		emit_default_action($fields, 1);
+		emit_default_action(1);
 	}
 	return;
 }
@@ -626,12 +561,11 @@ sub dump_buffer
 }

 # Emit the default action (usually token concatenation) for the current rule.
-#   Pass: fields array, brace_printed boolean
+#   Pass: brace_printed boolean
 # brace_printed should be true if caller already printed action's open brace.
 sub emit_default_action
 {
-	my ($flds, $brace_printed) = @_;
-	my $len = scalar(@$flds);
+	my ($brace_printed) = @_;

 	if ($stmt_mode == 0)
 	{
@@ -651,91 +585,21 @@ sub emit_default_action
 			);
 		}

-		if ($len == 0)
-		{
-			# Empty rule
-			if (!$brace_printed)
-			{
-				add_to_buffer('rules', ' { ');
-				$brace_printed = 1;
-			}
-			add_to_buffer('rules', ' $$=EMPTY; }');
-		}
-		else
-		{
-			# Go through each field and aggregate consecutive literal tokens
-			# into a single 'mm_strdup' call.
-			my @flds_new;
-			my $str;
-			for (my $z = 0; $z < $len; $z++)
-			{
-				if (substr($flds->[$z], 0, 1) eq '$')
-				{
-					push(@flds_new, $flds->[$z]);
-					next;
-				}
-
-				$str = $flds->[$z];
-
-				while (1)
-				{
-					if ($z >= $len - 1
-						|| substr($flds->[ $z + 1 ], 0, 1) eq '$')
-					{
-						# Can't combine any more literals; push to @flds_new.
-						# This code would need work if any literals contain
-						# backslash or double quote, but right now that never
-						# happens.
-						push(@flds_new, "mm_strdup(\"$str\")");
-						last;
-					}
-					$z++;
-					$str = $str . ' ' . $flds->[$z];
-				}
-			}
-
-			# So - how many fields did we end up with ?
-			$len = scalar(@flds_new);
-			if ($len == 1)
-			{
-				# Single field can be handled by straight assignment
-				if (!$brace_printed)
-				{
-					add_to_buffer('rules', ' { ');
-					$brace_printed = 1;
-				}
-				$str = ' $$ = ' . $flds_new[0] . ';';
-				add_to_buffer('rules', $str);
-			}
-			else
-			{
-				# Need to concatenate the results to form our final string
-				if (!$brace_printed)
-				{
-					add_to_buffer('rules', ' { ');
-					$brace_printed = 1;
-				}
-				$str =
-				  ' $$ = cat_str(' . $len . ',' . join(',', @flds_new) . ');';
-				add_to_buffer('rules', $str);
-			}
-			add_to_buffer('rules', '}') if ($brace_printed);
-		}
+		add_to_buffer('rules', '}') if ($brace_printed);
 	}
 	else
 	{
 		# We're in the "stmt:" rule, where we need to output special actions.
 		# This code assumes that no ecpg.addons entry applies.
-		if ($len)
+		if ($line_count)
 		{
 			# Any regular kind of statement calls output_statement
 			add_to_buffer('rules',
-				' { output_statement($1, 0, ECPGst_normal); }');
+				' { output_statement(@1, 0, ECPGst_normal); }');
 		}
 		else
 		{
 			# The empty production for stmt: do nothing
-			add_to_buffer('rules', ' { $$ = NULL; }');
 		}
 	}
 	return;
@@ -746,8 +610,6 @@ sub emit_default_action
 # entry in %replace_line, then do nothing and return 0.
 sub emit_rule
 {
-	my ($fields) = @_;
-
 	# compute tag to be used as lookup key in %replace_line and %addons
 	my $tag = $non_term_id . $line;
 	$tag =~ tr/ |//d;
@@ -761,7 +623,8 @@ sub emit_rule
 			return 0;
 		}

-		# non-ignore entries replace the line, but we'd better keep any '|'
+		# non-ignore entries replace the line, but we'd better keep any '|';
+		# we don't bother to update $line_count here.
 		if (index($line, '|') != -1)
 		{
 			$line = '| ' . $rep;
@@ -778,7 +641,7 @@ sub emit_rule

 	# Emit $line, then print the appropriate action.
 	add_to_buffer('rules', $line);
-	emit_rule_action($tag, $fields);
+	emit_rule_action($tag);
 	return 1;
 }

--- a/src/interfaces/ecpg/preproc/parser.c
+++ b/src/interfaces/ecpg/preproc/parser.c
@@ -31,6 +31,7 @@ static YYSTYPE lookahead_yylval;	/* yylval for lookahead token */
 static YYLTYPE lookahead_yylloc;	/* yylloc for lookahead token */
 static char *lookahead_yytext;	/* start current token */

+static int	base_yylex_location(void);
 static bool check_uescapechar(unsigned char escape);
 static bool ecpg_isspace(char ch);

@@ -71,7 +72,7 @@ filtered_base_yylex(void)
 		have_lookahead = false;
 	}
 	else
-		cur_token = base_yylex();
+		cur_token = base_yylex_location();

 	/*
 	 * If this token isn't one that requires lookahead, just return it.
@@ -96,7 +97,7 @@ filtered_base_yylex(void)
 	cur_yytext = base_yytext;

 	/* Get next token, saving outputs into lookahead variables */
-	next_token = base_yylex();
+	next_token = base_yylex_location();

 	lookahead_token = next_token;
 	lookahead_yylval = base_yylval;
@@ -184,7 +185,7 @@ filtered_base_yylex(void)
 				cur_yytext = base_yytext;

 				/* Get third token */
-				next_token = base_yylex();
+				next_token = base_yylex_location();

 				if (next_token != SCONST)
 					mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
@@ -203,6 +204,7 @@ filtered_base_yylex(void)

 				/* Combine 3 tokens into 1 */
 				base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr);
+				base_yylloc = mm_strdup(base_yylval.str);

 				/* Clear have_lookahead, thereby consuming all three tokens */
 				have_lookahead = false;
@@ -218,6 +220,56 @@ filtered_base_yylex(void)
 	return cur_token;
 }

+/*
+ * Call base_yylex() and fill in base_yylloc.
+ *
+ * pgc.l does not worry about setting yylloc, and given what we want for
+ * that, trying to set it there would be pretty inconvenient.  What we
+ * want is: if the returned token has type <str>, then duplicate its
+ * string value as yylloc; otherwise, make a downcased copy of yytext.
+ * The downcasing is ASCII-only because all that we care about there
+ * is producing uniformly-cased output of keywords.  (That's mostly
+ * cosmetic, but there are places in ecpglib that expect to receive
+ * downcased keywords, plus it keeps us regression-test-compatible
+ * with the pre-v18 implementation of ecpg.)
+ */
+static int
+base_yylex_location(void)
+{
+	int			token = base_yylex();
+
+	switch (token)
+	{
+			/* List a token here if pgc.l assigns to base_yylval.str for it */
+		case Op:
+		case CSTRING:
+		case CPP_LINE:
+		case CVARIABLE:
+		case BCONST:
+		case SCONST:
+		case USCONST:
+		case XCONST:
+		case FCONST:
+		case IDENT:
+		case UIDENT:
+		case IP:
+			/* Duplicate the <str> value */
+			base_yylloc = mm_strdup(base_yylval.str);
+			break;
+		default:
+			/* Else just use the input, i.e., yytext */
+			base_yylloc = mm_strdup(base_yytext);
+			/* Apply an ASCII-only downcasing */
+			for (unsigned char *ptr = (unsigned char *) base_yylloc; *ptr; ptr++)
+			{
+				if (*ptr >= 'A' && *ptr <= 'Z')
+					*ptr += 'a' - 'A';
+			}
+			break;
+	}
+	return token;
+}
+
 /*
 * check_uescapechar() and ecpg_isspace() should match their equivalents
 * in pgc.l.
--- a/src/interfaces/ecpg/preproc/preproc_extern.h
+++ b/src/interfaces/ecpg/preproc/preproc_extern.h
@@ -15,6 +15,13 @@
 #define STRUCT_DEPTH 128
 #define EMPTY mm_strdup("")

+/*
+ * "Location tracking" support --- see ecpg.header for more comments.
+ */
+typedef char *YYLTYPE;
+
+#define YYLTYPE_IS_DECLARED 1
+
 /* variables */

 extern bool autocommit,
@@ -65,10 +72,10 @@ extern const uint16 SQLScanKeywordTokens[];
 extern const char *get_dtype(enum ECPGdtype);
 extern void lex_init(void);
 extern void output_line_number(void);
-extern void output_statement(char *stmt, int whenever_mode, enum ECPG_statement_type st);
-extern void output_prepare_statement(char *name, char *stmt);
-extern void output_deallocate_prepare_statement(char *name);
-extern void output_simple_statement(char *stmt, int whenever_mode);
+extern void output_statement(const char *stmt, int whenever_mode, enum ECPG_statement_type st);
+extern void output_prepare_statement(const char *name, const char *stmt);
+extern void output_deallocate_prepare_statement(const char *name);
+extern void output_simple_statement(const char *stmt, int whenever_mode);
 extern char *hashline_number(void);
 extern int	base_yyparse(void);
 extern int	base_yylex(void);