From d1b307eef2818fe24760cc2c168d7d65d59775a8 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Sun, 27 Feb 2022 11:12:30 +0000 Subject: [PATCH] Optimise numeric division for one and two base-NBASE digit divisors. Formerly div_var() had "fast path" short division code that was significantly faster when the divisor was just one base-NBASE digit, but otherwise used long division. This commit adds a new function div_var_int() that divides by an arbitrary 32-bit integer, using the fast short division algorithm, and updates both div_var() and div_var_fast() to use it for one and two digit divisors. In the case of div_var(), this is slightly faster in the one-digit case, because it avoids some digit array copying, and is much faster in the two-digit case where it replaces long division. For div_var_fast(), it is much faster in both cases because the main div_var_fast() algorithm is optimised for larger inputs. Additionally, optimise exp() and ln() by using div_var_int(), allowing a NumericVar to be replaced by an int in a couple of places, most notably in the Taylor series code. This produces a significant speedup of exp(), ln() and the numeric_big regression test. Dean Rasheed, reviewed by Tom Lane. Discussion: https://postgr.es/m/CAEZATCVwsBi-ND-t82Cuuh1=8ee6jdOpzsmGN+CUZB6yjLg9jw@mail.gmail.com --- src/backend/utils/adt/numeric.c | 223 ++++++++++++++++++++++++++------ 1 file changed, 180 insertions(+), 43 deletions(-) diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 47475bf695b..975d7dcf476 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -551,6 +551,8 @@ static void div_var(const NumericVar *var1, const NumericVar *var2, int rscale, bool round); static void div_var_fast(const NumericVar *var1, const NumericVar *var2, NumericVar *result, int rscale, bool round); +static void div_var_int(const NumericVar *var, int ival, int ival_weight, + NumericVar *result, int rscale, bool round); static int select_div_scale(const NumericVar *var1, const NumericVar *var2); static void mod_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result); @@ -8451,8 +8453,33 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, errmsg("division by zero"))); /* - * Now result zero check + * If the divisor has just one or two digits, delegate to div_var_int(), + * which uses fast short division. */ + if (var2ndigits <= 2) + { + int idivisor; + int idivisor_weight; + + idivisor = var2->digits[0]; + idivisor_weight = var2->weight; + if (var2ndigits == 2) + { + idivisor = idivisor * NBASE + var2->digits[1]; + idivisor_weight--; + } + if (var2->sign == NUMERIC_NEG) + idivisor = -idivisor; + + div_var_int(var1, idivisor, idivisor_weight, result, rscale, round); + return; + } + + /* + * Otherwise, perform full long division. + */ + + /* Result zero check */ if (var1ndigits == 0) { zero_var(result); @@ -8510,23 +8537,6 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, alloc_var(result, res_ndigits); res_digits = result->digits; - if (var2ndigits == 1) - { - /* - * If there's only a single divisor digit, we can use a fast path (cf. - * Knuth section 4.3.1 exercise 16). - */ - divisor1 = divisor[1]; - carry = 0; - for (i = 0; i < res_ndigits; i++) - { - carry = carry * NBASE + dividend[i + 1]; - res_digits[i] = carry / divisor1; - carry = carry % divisor1; - } - } - else - { /* * The full multiple-place algorithm is taken from Knuth volume 2, * Algorithm 4.3.1D. @@ -8659,7 +8669,6 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, /* And we're done with this quotient digit */ res_digits[j] = qhat; } - } pfree(dividend); @@ -8735,8 +8744,33 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2, errmsg("division by zero"))); /* - * Now result zero check + * If the divisor has just one or two digits, delegate to div_var_int(), + * which uses fast short division. */ + if (var2ndigits <= 2) + { + int idivisor; + int idivisor_weight; + + idivisor = var2->digits[0]; + idivisor_weight = var2->weight; + if (var2ndigits == 2) + { + idivisor = idivisor * NBASE + var2->digits[1]; + idivisor_weight--; + } + if (var2->sign == NUMERIC_NEG) + idivisor = -idivisor; + + div_var_int(var1, idivisor, idivisor_weight, result, rscale, round); + return; + } + + /* + * Otherwise, perform full long division. + */ + + /* Result zero check */ if (var1ndigits == 0) { zero_var(result); @@ -9008,6 +9042,118 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2, } +/* + * div_var_int() - + * + * Divide a numeric variable by a 32-bit integer with the specified weight. + * The quotient var / (ival * NBASE^ival_weight) is stored in result. + */ +static void +div_var_int(const NumericVar *var, int ival, int ival_weight, + NumericVar *result, int rscale, bool round) +{ + NumericDigit *var_digits = var->digits; + int var_ndigits = var->ndigits; + int res_sign; + int res_weight; + int res_ndigits; + NumericDigit *res_buf; + NumericDigit *res_digits; + uint32 divisor; + int i; + + /* Guard against division by zero */ + if (ival == 0) + ereport(ERROR, + errcode(ERRCODE_DIVISION_BY_ZERO), + errmsg("division by zero")); + + /* Result zero check */ + if (var_ndigits == 0) + { + zero_var(result); + result->dscale = rscale; + return; + } + + /* + * Determine the result sign, weight and number of digits to calculate. + * The weight figured here is correct if the emitted quotient has no + * leading zero digits; otherwise strip_var() will fix things up. + */ + if (var->sign == NUMERIC_POS) + res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG; + else + res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS; + res_weight = var->weight - ival_weight; + /* The number of accurate result digits we need to produce: */ + res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS; + /* ... but always at least 1 */ + res_ndigits = Max(res_ndigits, 1); + /* If rounding needed, figure one more digit to ensure correct result */ + if (round) + res_ndigits++; + + res_buf = digitbuf_alloc(res_ndigits + 1); + res_buf[0] = 0; /* spare digit for later rounding */ + res_digits = res_buf + 1; + + /* + * Now compute the quotient digits. This is the short division algorithm + * described in Knuth volume 2, section 4.3.1 exercise 16, except that we + * allow the divisor to exceed the internal base. + * + * In this algorithm, the carry from one digit to the next is at most + * divisor - 1. Therefore, while processing the next digit, carry may + * become as large as divisor * NBASE - 1, and so it requires a 64-bit + * integer if this exceeds UINT_MAX. + */ + divisor = Abs(ival); + + if (divisor <= UINT_MAX / NBASE) + { + /* carry cannot overflow 32 bits */ + uint32 carry = 0; + + for (i = 0; i < res_ndigits; i++) + { + carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0); + res_digits[i] = (NumericDigit) (carry / divisor); + carry = carry % divisor; + } + } + else + { + /* carry may exceed 32 bits */ + uint64 carry = 0; + + for (i = 0; i < res_ndigits; i++) + { + carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0); + res_digits[i] = (NumericDigit) (carry / divisor); + carry = carry % divisor; + } + } + + /* Store the quotient in result */ + digitbuf_free(result->buf); + result->ndigits = res_ndigits; + result->buf = res_buf; + result->digits = res_digits; + result->weight = res_weight; + result->sign = res_sign; + + /* Round or truncate to target rscale (and set result->dscale) */ + if (round) + round_var(result, rscale); + else + trunc_var(result, rscale); + + /* Strip leading/trailing zeroes */ + strip_var(result); +} + + /* * Default scale selection for division * @@ -9783,7 +9929,7 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale) { NumericVar x; NumericVar elem; - NumericVar ni; + int ni; double val; int dweight; int ndiv2; @@ -9792,7 +9938,6 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale) init_var(&x); init_var(&elem); - init_var(&ni); set_var_from_var(arg, &x); @@ -9820,15 +9965,13 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale) /* * Reduce x to the range -0.01 <= x <= 0.01 (approximately) by dividing by - * 2^n, to improve the convergence rate of the Taylor series. + * 2^ndiv2, to improve the convergence rate of the Taylor series. + * + * Note that the overflow check above ensures that Abs(x) < 6000, which + * means that ndiv2 <= 20 here. */ if (Abs(val) > 0.01) { - NumericVar tmp; - - init_var(&tmp); - set_var_from_var(&const_two, &tmp); - ndiv2 = 1; val /= 2; @@ -9836,13 +9979,10 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale) { ndiv2++; val /= 2; - add_var(&tmp, &tmp, &tmp); } local_rscale = x.dscale + ndiv2; - div_var_fast(&x, &tmp, &x, local_rscale, true); - - free_var(&tmp); + div_var_int(&x, 1 << ndiv2, 0, &x, local_rscale, true); } else ndiv2 = 0; @@ -9870,16 +10010,16 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale) add_var(&const_one, &x, result); mul_var(&x, &x, &elem, local_rscale); - set_var_from_var(&const_two, &ni); - div_var_fast(&elem, &ni, &elem, local_rscale, true); + ni = 2; + div_var_int(&elem, ni, 0, &elem, local_rscale, true); while (elem.ndigits != 0) { add_var(result, &elem, result); mul_var(&elem, &x, &elem, local_rscale); - add_var(&ni, &const_one, &ni); - div_var_fast(&elem, &ni, &elem, local_rscale, true); + ni++; + div_var_int(&elem, ni, 0, &elem, local_rscale, true); } /* @@ -9899,7 +10039,6 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale) free_var(&x); free_var(&elem); - free_var(&ni); } @@ -9993,7 +10132,7 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale) { NumericVar x; NumericVar xx; - NumericVar ni; + int ni; NumericVar elem; NumericVar fact; int nsqrt; @@ -10012,7 +10151,6 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale) init_var(&x); init_var(&xx); - init_var(&ni); init_var(&elem); init_var(&fact); @@ -10073,13 +10211,13 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale) set_var_from_var(result, &xx); mul_var(result, result, &x, local_rscale); - set_var_from_var(&const_one, &ni); + ni = 1; for (;;) { - add_var(&ni, &const_two, &ni); + ni += 2; mul_var(&xx, &x, &xx, local_rscale); - div_var_fast(&xx, &ni, &elem, local_rscale, true); + div_var_int(&xx, ni, 0, &elem, local_rscale, true); if (elem.ndigits == 0) break; @@ -10095,7 +10233,6 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale) free_var(&x); free_var(&xx); - free_var(&ni); free_var(&elem); free_var(&fact); }