From 147bbc90f75794e5522dcbadaf2bbe1af3ce574a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 26 Sep 2024 11:02:31 -0400 Subject: [PATCH] Modernize to_char's Roman-numeral code, fixing overflow problems. int_to_roman() only accepts plain "int" input, which is fine since we're going to produce '###############' for any value above 3999 anyway. However, the numeric and int8 variants of to_char() would throw an error if the given input exceeded the integer range, while the float-input variants invoked undefined-per-C-standard behavior. Fix things so that you uniformly get '###############' for out of range input. Also add test cases covering this code, plus the equally-untested EEEE, V, and PL format codes. Discussion: https://postgr.es/m/2956175.1725831136@sss.pgh.pa.us --- doc/src/sgml/func.sgml | 2 + src/backend/utils/adt/formatting.c | 92 ++++++++++++++++++++------- src/test/regress/expected/int8.out | 50 +++++++++++++++ src/test/regress/expected/numeric.out | 87 +++++++++++++++++++++++++ src/test/regress/sql/int8.sql | 8 +++ src/test/regress/sql/numeric.sql | 15 +++++ 6 files changed, 231 insertions(+), 23 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e39d524b6bd..d6acdd3059e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -8739,6 +8739,8 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); n is the number of digits following V. V with to_number divides in a similar manner. + The V can be thought of as marking the position + of an implicit decimal point in the input or output string. to_char and to_number do not support the use of V combined with a decimal point diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 68fa89418ff..85a7dd45619 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -5189,6 +5189,11 @@ NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree) } +/* + * Convert integer to Roman numerals + * Result is upper-case and not blank-padded (NUM_processor converts as needed) + * If input is out-of-range, produce '###############' + */ static char * int_to_roman(int number) { @@ -5201,32 +5206,42 @@ int_to_roman(int number) result = (char *) palloc(16); *result = '\0'; + /* + * This range limit is the same as in Oracle(TM). The difficulty with + * handling 4000 or more is that we'd need to use more than 3 "M"'s, and + * more than 3 of the same digit isn't considered a valid Roman string. + */ if (number > 3999 || number < 1) { fill_str(result, '#', 15); return result; } + + /* Convert to decimal, then examine each digit */ len = snprintf(numstr, sizeof(numstr), "%d", number); + Assert(len > 0 && len <= 4); for (p = numstr; *p != '\0'; p++, --len) { num = *p - ('0' + 1); if (num < 0) - continue; - - if (len > 3) + continue; /* ignore zeroes */ + /* switch on current column position */ + switch (len) { - while (num-- != -1) - strcat(result, "M"); - } - else - { - if (len == 3) + case 4: + while (num-- >= 0) + strcat(result, "M"); + break; + case 3: strcat(result, rm100[num]); - else if (len == 2) + break; + case 2: strcat(result, rm10[num]); - else if (len == 1) + break; + case 1: strcat(result, rm1[num]); + break; } } return result; @@ -6367,7 +6382,6 @@ numeric_to_char(PG_FUNCTION_ARGS) char *numstr, *orgnum, *p; - Numeric x; NUM_TOCHAR_prepare; @@ -6376,12 +6390,15 @@ numeric_to_char(PG_FUNCTION_ARGS) */ if (IS_ROMAN(&Num)) { - x = DatumGetNumeric(DirectFunctionCall2(numeric_round, - NumericGetDatum(value), - Int32GetDatum(0))); - numstr = - int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4, - NumericGetDatum(x)))); + int32 intvalue; + bool err; + + /* Round and convert to int */ + intvalue = numeric_int4_opt_error(value, &err); + /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */ + if (err) + intvalue = PG_INT32_MAX; + numstr = int_to_roman(intvalue); } else if (IS_EEEE(&Num)) { @@ -6421,6 +6438,7 @@ numeric_to_char(PG_FUNCTION_ARGS) { int numstr_pre_len; Numeric val = value; + Numeric x; if (IS_MULTI(&Num)) { @@ -6589,12 +6607,18 @@ int8_to_char(PG_FUNCTION_ARGS) NUM_TOCHAR_prepare; /* - * On DateType depend part (int32) + * On DateType depend part (int64) */ if (IS_ROMAN(&Num)) { - /* Currently don't support int8 conversion to roman... */ - numstr = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value)))); + int32 intvalue; + + /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */ + if (value <= PG_INT32_MAX && value >= PG_INT32_MIN) + intvalue = (int32) value; + else + intvalue = PG_INT32_MAX; + numstr = int_to_roman(intvalue); } else if (IS_EEEE(&Num)) { @@ -6695,7 +6719,18 @@ float4_to_char(PG_FUNCTION_ARGS) NUM_TOCHAR_prepare; if (IS_ROMAN(&Num)) - numstr = int_to_roman((int) rint(value)); + { + int32 intvalue; + + /* See notes in ftoi4() */ + value = rint(value); + /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */ + if (!isnan(value) && FLOAT4_FITS_IN_INT32(value)) + intvalue = (int32) value; + else + intvalue = PG_INT32_MAX; + numstr = int_to_roman(intvalue); + } else if (IS_EEEE(&Num)) { if (isnan(value) || isinf(value)) @@ -6797,7 +6832,18 @@ float8_to_char(PG_FUNCTION_ARGS) NUM_TOCHAR_prepare; if (IS_ROMAN(&Num)) - numstr = int_to_roman((int) rint(value)); + { + int32 intvalue; + + /* See notes in dtoi4() */ + value = rint(value); + /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */ + if (!isnan(value) && FLOAT8_FITS_IN_INT32(value)) + intvalue = (int32) value; + else + intvalue = PG_INT32_MAX; + numstr = int_to_roman(intvalue); + } else if (IS_EEEE(&Num)) { if (isnan(value) || isinf(value)) diff --git a/src/test/regress/expected/int8.out b/src/test/regress/expected/int8.out index fddc09f6305..392f3bf655c 100644 --- a/src/test/regress/expected/int8.out +++ b/src/test/regress/expected/int8.out @@ -530,6 +530,16 @@ SELECT to_char(q2, 'MI9999999999999999') FROM INT8_TBL; -4567890123456789 (5 rows) +SELECT to_char(q2, '9999999999999999PL') FROM INT8_TBL; + to_char +-------------------- + 456+ + 4567890123456789+ + 123+ + 4567890123456789+ + -4567890123456789 +(5 rows) + SELECT to_char(q2, 'FMS9999999999999999') FROM INT8_TBL; to_char ------------------- @@ -650,6 +660,46 @@ SELECT to_char(q2, '999999SG9999999999') FROM INT8_TBL; 456789-0123456789 (5 rows) +SELECT to_char(q2, 'FMRN') FROM INT8_TBL; + to_char +----------------- + CDLVI + ############### + CXXIII + ############### + ############### +(5 rows) + +SELECT to_char(1234, '9.99EEEE'); + to_char +----------- + 1.23e+03 +(1 row) + +SELECT to_char(1234::int8, '9.99eeee'); + to_char +----------- + 1.23e+03 +(1 row) + +SELECT to_char(-1234::int8, '9.99eeee'); + to_char +----------- + -1.23e+03 +(1 row) + +SELECT to_char(1234, '99999V99'); + to_char +---------- + 123400 +(1 row) + +SELECT to_char(1234::int8, '99999V99'); + to_char +---------- + 123400 +(1 row) + -- check min/max values and overflow behavior select '-9223372036854775808'::int8; int8 diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index f30ac236f52..d54282496ab 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -1991,6 +1991,21 @@ SELECT to_char(val, '9.999EEEE') FROM num_data; -2.493e+07 (10 rows) +SELECT to_char(val, 'FMRN') FROM num_data; + to_char +----------------- + ############### + ############### + ############### + IV + ############### + ############### + ############### + ############### + ############### + ############### +(10 rows) + WITH v(val) AS (VALUES('0'::numeric),('-4.2'),('4.2e9'),('1.2e-5'),('inf'),('-inf'),('nan')) SELECT val, @@ -2101,6 +2116,72 @@ SELECT to_char('12345678901'::float8, 'FM9999999999D9999900000000000000000'); ##########.#### (1 row) +SELECT to_char('100'::numeric, 'rn'); + to_char +----------------- + c +(1 row) + +SELECT to_char('1234'::numeric, 'rn'); + to_char +----------------- + mccxxxiv +(1 row) + +SELECT to_char('1235'::float4, 'rn'); + to_char +----------------- + mccxxxv +(1 row) + +SELECT to_char('1236'::float8, 'rn'); + to_char +----------------- + mccxxxvi +(1 row) + +SELECT to_char('1237'::float8, 'fmrn'); + to_char +----------- + mccxxxvii +(1 row) + +SELECT to_char('100e9'::numeric, 'RN'); + to_char +----------------- + ############### +(1 row) + +SELECT to_char('100e9'::float4, 'RN'); + to_char +----------------- + ############### +(1 row) + +SELECT to_char('100e9'::float8, 'RN'); + to_char +----------------- + ############### +(1 row) + +SELECT to_char(1234.56::numeric, '99999V99'); + to_char +---------- + 123456 +(1 row) + +SELECT to_char(1234.56::float4, '99999V99'); + to_char +---------- + 123456 +(1 row) + +SELECT to_char(1234.56::float8, '99999V99'); + to_char +---------- + 123456 +(1 row) + -- Check parsing of literal text in a format string SELECT to_char('100'::numeric, 'foo999'); to_char @@ -2297,6 +2378,12 @@ SELECT to_number('42nd', '99th'); 42 (1 row) +SELECT to_number('123456', '99999V99'); + to_number +------------------------- + 1234.560000000000000000 +(1 row) + RESET lc_numeric; -- -- Input syntax diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql index fffb28906a1..53359f06ade 100644 --- a/src/test/regress/sql/int8.sql +++ b/src/test/regress/sql/int8.sql @@ -110,6 +110,7 @@ SELECT to_char( (q1 * -1), '9999999999999999S'), to_char( (q2 * -1), 'S999999999 FROM INT8_TBL; SELECT to_char(q2, 'MI9999999999999999') FROM INT8_TBL; +SELECT to_char(q2, '9999999999999999PL') FROM INT8_TBL; SELECT to_char(q2, 'FMS9999999999999999') FROM INT8_TBL; SELECT to_char(q2, 'FM9999999999999999THPR') FROM INT8_TBL; SELECT to_char(q2, 'SG9999999999999999th') FROM INT8_TBL; @@ -122,6 +123,13 @@ SELECT to_char(q2, 'FM9999999999999999.999') FROM INT8_TBL; SELECT to_char(q2, 'S 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9') FROM INT8_TBL; SELECT to_char(q2, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\\"" 9999') FROM INT8_TBL; SELECT to_char(q2, '999999SG9999999999') FROM INT8_TBL; +SELECT to_char(q2, 'FMRN') FROM INT8_TBL; + +SELECT to_char(1234, '9.99EEEE'); +SELECT to_char(1234::int8, '9.99eeee'); +SELECT to_char(-1234::int8, '9.99eeee'); +SELECT to_char(1234, '99999V99'); +SELECT to_char(1234::int8, '99999V99'); -- check min/max values and overflow behavior diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index c86395209ab..b508cba71dd 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -996,6 +996,7 @@ SELECT to_char(val, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\ SELECT to_char(val, '999999SG9999999999') FROM num_data; SELECT to_char(val, 'FM9999999999999999.999999999999999') FROM num_data; SELECT to_char(val, '9.999EEEE') FROM num_data; +SELECT to_char(val, 'FMRN') FROM num_data; WITH v(val) AS (VALUES('0'::numeric),('-4.2'),('4.2e9'),('1.2e-5'),('inf'),('-inf'),('nan')) @@ -1033,6 +1034,19 @@ SELECT to_char('100'::numeric, 'FM999.'); SELECT to_char('100'::numeric, 'FM999'); SELECT to_char('12345678901'::float8, 'FM9999999999D9999900000000000000000'); +SELECT to_char('100'::numeric, 'rn'); +SELECT to_char('1234'::numeric, 'rn'); +SELECT to_char('1235'::float4, 'rn'); +SELECT to_char('1236'::float8, 'rn'); +SELECT to_char('1237'::float8, 'fmrn'); +SELECT to_char('100e9'::numeric, 'RN'); +SELECT to_char('100e9'::float4, 'RN'); +SELECT to_char('100e9'::float8, 'RN'); + +SELECT to_char(1234.56::numeric, '99999V99'); +SELECT to_char(1234.56::float4, '99999V99'); +SELECT to_char(1234.56::float8, '99999V99'); + -- Check parsing of literal text in a format string SELECT to_char('100'::numeric, 'foo999'); SELECT to_char('100'::numeric, 'f\oo999'); @@ -1070,6 +1084,7 @@ SELECT to_number('$1,234.56','L99,999.99'); SELECT to_number('1234.56','L99,999.99'); SELECT to_number('1,234.56','L99,999.99'); SELECT to_number('42nd', '99th'); +SELECT to_number('123456', '99999V99'); RESET lc_numeric; --