Simplify json lexing state

Instead of updating the length as we go, use a const pointer to end of the input, which we know already at the start. This simplifies the coding and may improve performance slightly, but the real motivation for doing this is to make further changes in this area easier to reason about. Discussion: https://www.postgresql.org/message-id/CAFBsxsGhaR2KQ5eisaK%3D6Vm60t%3DaxhD8Ckj1qFoCH1pktZi%2B2w%40mail.gmail.com
2025-07-11 10:01:57 +03:00 · 2022-07-08 14:53:20 +07:00
parent 3140f08985
commit 3de359f18f
1 changed files with 8 additions and 15 deletions
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@ -519,26 +519,23 @@ JsonParseErrorType
 json_lex(JsonLexContext *lex)
 {
 	char	   *s;
-	int			len;
+	char	   *const end = lex->input + lex->input_length;
 	JsonParseErrorType result;

 	/* Skip leading whitespace. */
 	s = lex->token_terminator;
-	len = s - lex->input;
-	while (len < lex->input_length &&
-		   (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
+	while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
 	{
 		if (*s++ == '\n')
 		{
 			++lex->line_number;
 			lex->line_start = s;
 		}
-		len++;
 	}
 	lex->token_start = s;

 	/* Determine token type. */
-	if (len >= lex->input_length)
+	if (s >= end)
 	{
 		lex->token_start = NULL;
 		lex->prev_token_terminator = lex->token_terminator;
@ -623,7 +620,7 @@ json_lex(JsonLexContext *lex)
 					 * the whole word as an unexpected token, rather than just
 					 * some unintuitive prefix thereof.
 					 */
-					for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
+					for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
 						 /* skip */ ;

 					/*
@ -672,7 +669,7 @@ static inline JsonParseErrorType
 json_lex_string(JsonLexContext *lex)
 {
 	char	   *s;
-	int			len;
+	char	   *const end = lex->input + lex->input_length;
 	int			hi_surrogate = -1;

 	if (lex->strval != NULL)
@ -680,13 +677,11 @@ json_lex_string(JsonLexContext *lex)

 	Assert(lex->input_length > 0);
 	s = lex->token_start;
-	len = lex->token_start - lex->input;
 	for (;;)
 	{
 		s++;
-		len++;
 		/* Premature end of the string. */
-		if (len >= lex->input_length)
+		if (s >= end)
 		{
 			lex->token_terminator = s;
 			return JSON_INVALID_TOKEN;
@ -704,8 +699,7 @@ json_lex_string(JsonLexContext *lex)
 		{
 			/* OK, we have an escape character. */
 			s++;
-			len++;
-			if (len >= lex->input_length)
+			if (s >= end)
 			{
 				lex->token_terminator = s;
 				return JSON_INVALID_TOKEN;
@ -718,8 +712,7 @@ json_lex_string(JsonLexContext *lex)
 				for (i = 1; i <= 4; i++)
 				{
 					s++;
-					len++;
-					if (len >= lex->input_length)
+					if (s >= end)
 					{
 						lex->token_terminator = s;
 						return JSON_INVALID_TOKEN;