From 147bbc90f75794e5522dcbadaf2bbe1af3ce574a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 26 Sep 2024 11:02:31 -0400
Subject: [PATCH] Modernize to_char's Roman-numeral code, fixing overflow
 problems.

int_to_roman() only accepts plain "int" input, which is fine since
we're going to produce '###############' for any value above 3999
anyway.  However, the numeric and int8 variants of to_char() would
throw an error if the given input exceeded the integer range, while
the float-input variants invoked undefined-per-C-standard behavior.
Fix things so that you uniformly get '###############' for out of
range input.

Also add test cases covering this code, plus the equally-untested
EEEE, V, and PL format codes.

Discussion: https://postgr.es/m/2956175.1725831136@sss.pgh.pa.us
---
 doc/src/sgml/func.sgml                |  2 +
 src/backend/utils/adt/formatting.c    | 92 ++++++++++++++++++++-------
 src/test/regress/expected/int8.out    | 50 +++++++++++++++
 src/test/regress/expected/numeric.out | 87 +++++++++++++++++++++++++
 src/test/regress/sql/int8.sql         |  8 +++
 src/test/regress/sql/numeric.sql      | 15 +++++
 6 files changed, 231 insertions(+), 23 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e39d524b6bd..d6acdd3059e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -8739,6 +8739,8 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
        <replaceable>n</replaceable> is the number of digits following
        <literal>V</literal>.  <literal>V</literal> with
        <function>to_number</function> divides in a similar manner.
+       The <literal>V</literal> can be thought of as marking the position
+       of an implicit decimal point in the input or output string.
        <function>to_char</function> and <function>to_number</function>
        do not support the use of
        <literal>V</literal> combined with a decimal point
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 68fa89418ff..85a7dd45619 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -5189,6 +5189,11 @@ NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
 }
 
 
+/*
+ * Convert integer to Roman numerals
+ * Result is upper-case and not blank-padded (NUM_processor converts as needed)
+ * If input is out-of-range, produce '###############'
+ */
 static char *
 int_to_roman(int number)
 {
@@ -5201,32 +5206,42 @@ int_to_roman(int number)
 	result = (char *) palloc(16);
 	*result = '\0';
 
+	/*
+	 * This range limit is the same as in Oracle(TM).  The difficulty with
+	 * handling 4000 or more is that we'd need to use more than 3 "M"'s, and
+	 * more than 3 of the same digit isn't considered a valid Roman string.
+	 */
 	if (number > 3999 || number < 1)
 	{
 		fill_str(result, '#', 15);
 		return result;
 	}
+
+	/* Convert to decimal, then examine each digit */
 	len = snprintf(numstr, sizeof(numstr), "%d", number);
+	Assert(len > 0 && len <= 4);
 
 	for (p = numstr; *p != '\0'; p++, --len)
 	{
 		num = *p - ('0' + 1);
 		if (num < 0)
-			continue;
-
-		if (len > 3)
+			continue;			/* ignore zeroes */
+		/* switch on current column position */
+		switch (len)
 		{
-			while (num-- != -1)
-				strcat(result, "M");
-		}
-		else
-		{
-			if (len == 3)
+			case 4:
+				while (num-- >= 0)
+					strcat(result, "M");
+				break;
+			case 3:
 				strcat(result, rm100[num]);
-			else if (len == 2)
+				break;
+			case 2:
 				strcat(result, rm10[num]);
-			else if (len == 1)
+				break;
+			case 1:
 				strcat(result, rm1[num]);
+				break;
 		}
 	}
 	return result;
@@ -6367,7 +6382,6 @@ numeric_to_char(PG_FUNCTION_ARGS)
 	char	   *numstr,
 			   *orgnum,
 			   *p;
-	Numeric		x;
 
 	NUM_TOCHAR_prepare;
 
@@ -6376,12 +6390,15 @@ numeric_to_char(PG_FUNCTION_ARGS)
 	 */
 	if (IS_ROMAN(&Num))
 	{
-		x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
-												NumericGetDatum(value),
-												Int32GetDatum(0)));
-		numstr =
-			int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
-														   NumericGetDatum(x))));
+		int32		intvalue;
+		bool		err;
+
+		/* Round and convert to int */
+		intvalue = numeric_int4_opt_error(value, &err);
+		/* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
+		if (err)
+			intvalue = PG_INT32_MAX;
+		numstr = int_to_roman(intvalue);
 	}
 	else if (IS_EEEE(&Num))
 	{
@@ -6421,6 +6438,7 @@ numeric_to_char(PG_FUNCTION_ARGS)
 	{
 		int			numstr_pre_len;
 		Numeric		val = value;
+		Numeric		x;
 
 		if (IS_MULTI(&Num))
 		{
@@ -6589,12 +6607,18 @@ int8_to_char(PG_FUNCTION_ARGS)
 	NUM_TOCHAR_prepare;
 
 	/*
-	 * On DateType depend part (int32)
+	 * On DateType depend part (int64)
 	 */
 	if (IS_ROMAN(&Num))
 	{
-		/* Currently don't support int8 conversion to roman... */
-		numstr = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value))));
+		int32		intvalue;
+
+		/* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
+		if (value <= PG_INT32_MAX && value >= PG_INT32_MIN)
+			intvalue = (int32) value;
+		else
+			intvalue = PG_INT32_MAX;
+		numstr = int_to_roman(intvalue);
 	}
 	else if (IS_EEEE(&Num))
 	{
@@ -6695,7 +6719,18 @@ float4_to_char(PG_FUNCTION_ARGS)
 	NUM_TOCHAR_prepare;
 
 	if (IS_ROMAN(&Num))
-		numstr = int_to_roman((int) rint(value));
+	{
+		int32		intvalue;
+
+		/* See notes in ftoi4() */
+		value = rint(value);
+		/* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
+		if (!isnan(value) && FLOAT4_FITS_IN_INT32(value))
+			intvalue = (int32) value;
+		else
+			intvalue = PG_INT32_MAX;
+		numstr = int_to_roman(intvalue);
+	}
 	else if (IS_EEEE(&Num))
 	{
 		if (isnan(value) || isinf(value))
@@ -6797,7 +6832,18 @@ float8_to_char(PG_FUNCTION_ARGS)
 	NUM_TOCHAR_prepare;
 
 	if (IS_ROMAN(&Num))
-		numstr = int_to_roman((int) rint(value));
+	{
+		int32		intvalue;
+
+		/* See notes in dtoi4() */
+		value = rint(value);
+		/* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
+		if (!isnan(value) && FLOAT8_FITS_IN_INT32(value))
+			intvalue = (int32) value;
+		else
+			intvalue = PG_INT32_MAX;
+		numstr = int_to_roman(intvalue);
+	}
 	else if (IS_EEEE(&Num))
 	{
 		if (isnan(value) || isinf(value))
diff --git a/src/test/regress/expected/int8.out b/src/test/regress/expected/int8.out
index fddc09f6305..392f3bf655c 100644
--- a/src/test/regress/expected/int8.out
+++ b/src/test/regress/expected/int8.out
@@ -530,6 +530,16 @@ SELECT to_char(q2, 'MI9999999999999999')     FROM INT8_TBL;
  -4567890123456789
 (5 rows)
 
+SELECT to_char(q2, '9999999999999999PL')     FROM INT8_TBL;
+      to_char       
+--------------------
+               456+
+  4567890123456789+
+               123+
+  4567890123456789+
+ -4567890123456789 
+(5 rows)
+
 SELECT to_char(q2, 'FMS9999999999999999')    FROM INT8_TBL;
       to_char      
 -------------------
@@ -650,6 +660,46 @@ SELECT to_char(q2, '999999SG9999999999')     FROM INT8_TBL;
  456789-0123456789
 (5 rows)
 
+SELECT to_char(q2, 'FMRN')                   FROM INT8_TBL;
+     to_char     
+-----------------
+ CDLVI
+ ###############
+ CXXIII
+ ###############
+ ###############
+(5 rows)
+
+SELECT to_char(1234, '9.99EEEE');
+  to_char  
+-----------
+  1.23e+03
+(1 row)
+
+SELECT to_char(1234::int8, '9.99eeee');
+  to_char  
+-----------
+  1.23e+03
+(1 row)
+
+SELECT to_char(-1234::int8, '9.99eeee');
+  to_char  
+-----------
+ -1.23e+03
+(1 row)
+
+SELECT to_char(1234, '99999V99');
+ to_char  
+----------
+   123400
+(1 row)
+
+SELECT to_char(1234::int8, '99999V99');
+ to_char  
+----------
+   123400
+(1 row)
+
 -- check min/max values and overflow behavior
 select '-9223372036854775808'::int8;
          int8         
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index f30ac236f52..d54282496ab 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -1991,6 +1991,21 @@ SELECT to_char(val, '9.999EEEE')				FROM num_data;
  -2.493e+07
 (10 rows)
 
+SELECT to_char(val, 'FMRN')					FROM num_data;
+     to_char     
+-----------------
+ ###############
+ ###############
+ ###############
+ IV
+ ###############
+ ###############
+ ###############
+ ###############
+ ###############
+ ###############
+(10 rows)
+
 WITH v(val) AS
   (VALUES('0'::numeric),('-4.2'),('4.2e9'),('1.2e-5'),('inf'),('-inf'),('nan'))
 SELECT val,
@@ -2101,6 +2116,72 @@ SELECT to_char('12345678901'::float8, 'FM9999999999D9999900000000000000000');
  ##########.####
 (1 row)
 
+SELECT to_char('100'::numeric, 'rn');
+     to_char     
+-----------------
+               c
+(1 row)
+
+SELECT to_char('1234'::numeric, 'rn');
+     to_char     
+-----------------
+        mccxxxiv
+(1 row)
+
+SELECT to_char('1235'::float4, 'rn');
+     to_char     
+-----------------
+         mccxxxv
+(1 row)
+
+SELECT to_char('1236'::float8, 'rn');
+     to_char     
+-----------------
+        mccxxxvi
+(1 row)
+
+SELECT to_char('1237'::float8, 'fmrn');
+  to_char  
+-----------
+ mccxxxvii
+(1 row)
+
+SELECT to_char('100e9'::numeric, 'RN');
+     to_char     
+-----------------
+ ###############
+(1 row)
+
+SELECT to_char('100e9'::float4, 'RN');
+     to_char     
+-----------------
+ ###############
+(1 row)
+
+SELECT to_char('100e9'::float8, 'RN');
+     to_char     
+-----------------
+ ###############
+(1 row)
+
+SELECT to_char(1234.56::numeric, '99999V99');
+ to_char  
+----------
+   123456
+(1 row)
+
+SELECT to_char(1234.56::float4, '99999V99');
+ to_char  
+----------
+   123456
+(1 row)
+
+SELECT to_char(1234.56::float8, '99999V99');
+ to_char  
+----------
+   123456
+(1 row)
+
 -- Check parsing of literal text in a format string
 SELECT to_char('100'::numeric, 'foo999');
  to_char 
@@ -2297,6 +2378,12 @@ SELECT to_number('42nd', '99th');
         42
 (1 row)
 
+SELECT to_number('123456', '99999V99');
+        to_number        
+-------------------------
+ 1234.560000000000000000
+(1 row)
+
 RESET lc_numeric;
 --
 -- Input syntax
diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql
index fffb28906a1..53359f06ade 100644
--- a/src/test/regress/sql/int8.sql
+++ b/src/test/regress/sql/int8.sql
@@ -110,6 +110,7 @@ SELECT to_char( (q1 * -1), '9999999999999999S'), to_char( (q2 * -1), 'S999999999
 	FROM INT8_TBL;
 
 SELECT to_char(q2, 'MI9999999999999999')     FROM INT8_TBL;
+SELECT to_char(q2, '9999999999999999PL')     FROM INT8_TBL;
 SELECT to_char(q2, 'FMS9999999999999999')    FROM INT8_TBL;
 SELECT to_char(q2, 'FM9999999999999999THPR') FROM INT8_TBL;
 SELECT to_char(q2, 'SG9999999999999999th')   FROM INT8_TBL;
@@ -122,6 +123,13 @@ SELECT to_char(q2, 'FM9999999999999999.999') FROM INT8_TBL;
 SELECT to_char(q2, 'S 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9') FROM INT8_TBL;
 SELECT to_char(q2, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\\"" 9999') FROM INT8_TBL;
 SELECT to_char(q2, '999999SG9999999999')     FROM INT8_TBL;
+SELECT to_char(q2, 'FMRN')                   FROM INT8_TBL;
+
+SELECT to_char(1234, '9.99EEEE');
+SELECT to_char(1234::int8, '9.99eeee');
+SELECT to_char(-1234::int8, '9.99eeee');
+SELECT to_char(1234, '99999V99');
+SELECT to_char(1234::int8, '99999V99');
 
 -- check min/max values and overflow behavior
 
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index c86395209ab..b508cba71dd 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -996,6 +996,7 @@ SELECT to_char(val, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\
 SELECT to_char(val, '999999SG9999999999')			FROM num_data;
 SELECT to_char(val, 'FM9999999999999999.999999999999999')	FROM num_data;
 SELECT to_char(val, '9.999EEEE')				FROM num_data;
+SELECT to_char(val, 'FMRN')					FROM num_data;
 
 WITH v(val) AS
   (VALUES('0'::numeric),('-4.2'),('4.2e9'),('1.2e-5'),('inf'),('-inf'),('nan'))
@@ -1033,6 +1034,19 @@ SELECT to_char('100'::numeric, 'FM999.');
 SELECT to_char('100'::numeric, 'FM999');
 SELECT to_char('12345678901'::float8, 'FM9999999999D9999900000000000000000');
 
+SELECT to_char('100'::numeric, 'rn');
+SELECT to_char('1234'::numeric, 'rn');
+SELECT to_char('1235'::float4, 'rn');
+SELECT to_char('1236'::float8, 'rn');
+SELECT to_char('1237'::float8, 'fmrn');
+SELECT to_char('100e9'::numeric, 'RN');
+SELECT to_char('100e9'::float4, 'RN');
+SELECT to_char('100e9'::float8, 'RN');
+
+SELECT to_char(1234.56::numeric, '99999V99');
+SELECT to_char(1234.56::float4, '99999V99');
+SELECT to_char(1234.56::float8, '99999V99');
+
 -- Check parsing of literal text in a format string
 SELECT to_char('100'::numeric, 'foo999');
 SELECT to_char('100'::numeric, 'f\oo999');
@@ -1070,6 +1084,7 @@ SELECT to_number('$1,234.56','L99,999.99');
 SELECT to_number('1234.56','L99,999.99');
 SELECT to_number('1,234.56','L99,999.99');
 SELECT to_number('42nd', '99th');
+SELECT to_number('123456', '99999V99');
 RESET lc_numeric;
 
 --