Non-decimal integer literals

Add support for hexadecimal, octal, and binary integer literals: 0x42F 0o273 0b100101 per SQL:202x draft. This adds support in the lexer as well as in the integer type input functions. Reviewed-by: John Naylor <john.naylor@enterprisedb.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Reviewed-by: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb97d@enterprisedb.com
2025-07-17 06:41:09 +03:00 · 2022-12-14 05:40:38 +01:00
parent 60684dd834
commit 6fcda9aba8
16 changed files with 1022 additions and 112 deletions
--- a/src/backend/parser/parse_node.c
+++ b/src/backend/parser/parse_node.c
@ -385,11 +385,46 @@ make_const(ParseState *pstate, A_Const *aconst)
 			{
 				/* could be an oversize integer as well as a float ... */

+				int			base = 10;
+				char	   *startptr;
+				int			sign;
+				char	   *testvalue;
 				int64		val64;
 				char	   *endptr;

+				startptr = aconst->val.fval.fval;
+				if (startptr[0] == '-')
+				{
+					sign = -1;
+					startptr++;
+				}
+				else
+					sign = +1;
+				if (startptr[0] == '0')
+				{
+					if (startptr[1] == 'b' || startptr[1] == 'B')
+					{
+						base = 2;
+						startptr += 2;
+					}
+					else if (startptr[1] == 'o' || startptr[1] == 'O')
+					{
+						base = 8;
+						startptr += 2;
+					}
+					if (startptr[1] == 'x' || startptr[1] == 'X')
+					{
+						base = 16;
+						startptr += 2;
+					}
+				}
+
+				if (sign == +1)
+					testvalue = startptr;
+				else
+					testvalue = psprintf("-%s", startptr);
 				errno = 0;
-				val64 = strtoi64(aconst->val.fval.fval, &endptr, 10);
+				val64 = strtoi64(testvalue, &endptr, base);
 				if (errno == 0 && *endptr == '\0')
 				{
 					/*
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
 static char *litbufdup(core_yyscan_t yyscanner);
 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
-static int	process_integer_literal(const char *token, YYSTYPE *lval);
+static int	process_integer_literal(const char *token, YYSTYPE *lval, int base);
 static void addunicode(pg_wchar c, yyscan_t yyscanner);

 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
@ -385,25 +385,40 @@ operator		{op_chars}+
 * Unary minus is not part of a number here.  Instead we pass it separately to
 * the parser, and there it gets coerced via doNegate().
 *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
 *
 * {realfail} is added to prevent the need for scanner
 * backup when the {real} rule fails to match completely.
 */
-digit			[0-9]
+decdigit		[0-9]
+hexdigit		[0-9A-Fa-f]
+octdigit		[0-7]
+bindigit		[0-1]

-integer			{digit}+
-decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail		{digit}+\.\.
-real			({integer}|{decimal})[Ee][-+]?{digit}+
-realfail		({integer}|{decimal})[Ee][-+]
+decinteger		{decdigit}+
+hexinteger		0[xX]{hexdigit}+
+octinteger		0[oO]{octdigit}+
+bininteger		0[bB]{bindigit}+

-integer_junk	{integer}{ident_start}
-decimal_junk	{decimal}{ident_start}
+hexfail			0[xX]
+octfail			0[oO]
+binfail			0[bB]
+
+numeric			(({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail		{decdigit}+\.\.
+
+real			({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail		({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk	{decinteger}{ident_start}
+hexinteger_junk	{hexinteger}{ident_start}
+octinteger_junk	{octinteger}{ident_start}
+bininteger_junk	{bininteger}{ident_start}
+numeric_junk	{numeric}{ident_start}
 real_junk		{real}{ident_start}

-param			\${integer}
-param_junk		\${integer}{ident_start}
+param			\${decinteger}
+param_junk		\${decinteger}{ident_start}

 other			.

@ -983,20 +998,44 @@ other			.
 					yyerror("trailing junk after parameter");
 				}

-{integer}		{
+{decinteger}	{
 					SET_YYLLOC();
-					return process_integer_literal(yytext, yylval);
+					return process_integer_literal(yytext, yylval, 10);
 				}
-{decimal}		{
+{hexinteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext, yylval, 16);
+				}
+{octinteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext, yylval, 8);
+				}
+{bininteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext, yylval, 2);
+				}
+{hexfail}		{
+					SET_YYLLOC();
+					yyerror("invalid hexadecimal integer");
+				}
+{octfail}		{
+					SET_YYLLOC();
+					yyerror("invalid octal integer");
+				}
+{binfail}		{
+					SET_YYLLOC();
+					yyerror("invalid binary integer");
+				}
+{numeric}		{
 					SET_YYLLOC();
 					yylval->str = pstrdup(yytext);
 					return FCONST;
 				}
-{decimalfail}	{
+{numericfail}	{
 					/* throw back the .., and treat as integer */
 					yyless(yyleng - 2);
 					SET_YYLLOC();
-					return process_integer_literal(yytext, yylval);
+					return process_integer_literal(yytext, yylval, 10);
 				}
 {real}			{
 					SET_YYLLOC();
@ -1007,11 +1046,23 @@ other			.
 					SET_YYLLOC();
 					yyerror("trailing junk after numeric literal");
 				}
-{integer_junk}	{
+{decinteger_junk}	{
 					SET_YYLLOC();
 					yyerror("trailing junk after numeric literal");
 				}
-{decimal_junk}	{
+{hexinteger_junk}	{
+					SET_YYLLOC();
+					yyerror("trailing junk after numeric literal");
+				}
+{octinteger_junk}	{
+					SET_YYLLOC();
+					yyerror("trailing junk after numeric literal");
+				}
+{bininteger_junk}	{
+					SET_YYLLOC();
+					yyerror("trailing junk after numeric literal");
+				}
+{numeric_junk}	{
 					SET_YYLLOC();
 					yyerror("trailing junk after numeric literal");
 				}
@ -1307,17 +1358,17 @@ litbufdup(core_yyscan_t yyscanner)
 }

 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
- * ie digits and a decimal point.
+ * Process {decinteger}, {hexinteger}, etc.  Note this will also do the right
+ * thing with {numeric}, ie digits and a decimal point.
 */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
 	int			val;
 	char	   *endptr;

 	errno = 0;
-	val = strtoint(token, &endptr, 10);
+	val = strtoint(base == 10 ? token : token + 2, &endptr, base);
 	if (*endptr != '\0' || errno == ERANGE)
 	{
 		/* integer too large (or contains decimal pt), treat it as a float */