merged partial_multiply with regular_multiply function.

git-svn-id: svn://svn.code.sf.net/p/axtls/code/trunk@182 9a5d90b5-6617-0410-8a86-bb477d3ed2e3
2025-07-29 05:21:37 +03:00 · 2011-01-02 08:30:53 +00:00
parent 0d2e75b9c7
commit 8c18da4f1e
6 changed files with 73 additions and 118 deletions
--- a/crypto/bigint.c
+++ b/crypto/bigint.c
@ -801,11 +801,16 @@ void bi_free_mod(BI_CTX *ctx, int mod_offset)
 /** 
 * Perform a standard multiplication between two bigints.
 *
 * Barrett reduction has no need for some parts of the product, so ignore bits
 * of the multiply. This routine gives Barrett its big performance
 * improvements over Classical/Montgomery reduction methods. 
 */
-static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)
+static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib, 
        int inner_partial, int outer_partial)
 {
-    int i, j, i_plus_j;
+    int i = 0, j;
-    int n = bia->size; 
+    int n = bia->size;
    int t = bib->size;
    bigint *biR = alloc(ctx, n + t);
    comp *sr = biR->comps;
@ -817,23 +822,33 @@ static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)
    /* clear things to start with */
    memset(biR->comps, 0, ((n+t)*COMP_BYTE_SIZE));
    i = 0;
    do 
    {
        comp carry = 0;
        comp b = *sb++;
-        i_plus_j = i;
+        int r_index = i;
        j = 0;
        if (outer_partial)
        {
            r_index = outer_partial-1;
            j = outer_partial-i-1;
        }
        do
        {
-            long_comp tmp = sr[i_plus_j] + (long_comp)sa[j]*b + carry;
+            if (inner_partial && r_index >= inner_partial) 
-            sr[i_plus_j++] = (comp)tmp;              /* downsize */
+            {
-            carry = (comp)(tmp >> COMP_BIT_SIZE);
+                break;
            }
            long_comp tmp = sr[r_index] + ((long_comp)sa[j])*b + carry;
            sr[r_index++] = (comp)tmp;              /* downsize */
            carry = tmp >> COMP_BIT_SIZE;
        } while (++j < n);
-        sr[i_plus_j] = carry;
+        sr[r_index] = carry;
    } while (++i < t);
    bi_free(ctx, bia);
@ -913,12 +928,12 @@ bigint *bi_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)
 #ifdef CONFIG_BIGINT_KARATSUBA
    if (min(bia->size, bib->size) < MUL_KARATSUBA_THRESH)
    {
-        return regular_multiply(ctx, bia, bib);
+        return regular_multiply(ctx, bia, bib, 0, 0);
    }
    return karatsuba(ctx, bia, bib, 0);
 #else
-    return regular_multiply(ctx, bia, bib);
+    return regular_multiply(ctx, bia, bib, 0, 0);
 #endif
 }
@ -941,7 +956,7 @@ static bigint *regular_square(BI_CTX *ctx, bigint *bi)
        long_comp tmp = w[2*i] + (long_comp)x[i]*x[i];
        uint8_t c = 0;
        w[2*i] = (comp)tmp;
-        carry = (comp)(tmp >> COMP_BIT_SIZE);
+        carry = tmp >> COMP_BIT_SIZE;
        for (j = i+1; j < t; j++)
        {
@ -1242,81 +1257,6 @@ static bigint *comp_mod(bigint *bi, int mod)
    return bi;
 }
 /*
 * Barrett reduction has no need for some parts of the product, so ignore bits
 * of the multiply. This routine gives Barrett its big performance
 * improvements over Classical/Montgomery reduction methods. 
 */
 static bigint *partial_multiply(BI_CTX *ctx, bigint *bia, bigint *bib, 
        int inner_partial, int outer_partial)
 {
    int i = 0, j, n = bia->size, t = bib->size;
    bigint *biR;
    comp carry;
    comp *sr, *sa, *sb;
    check(bia);
    check(bib);
    biR = alloc(ctx, n + t);
    sa = bia->comps;
    sb = bib->comps;
    sr = biR->comps;
    if (inner_partial)
    {
        memset(sr, 0, inner_partial*COMP_BYTE_SIZE); 
    }
    else    /* outer partial */
    {
        if (n < outer_partial || t < outer_partial) /* should we bother? */
        {
            bi_free(ctx, bia);
            bi_free(ctx, bib);
            biR->comps[0] = 0;      /* return 0 */
            biR->size = 1;
            return biR;
        }
        memset(&sr[outer_partial], 0, (n+t-outer_partial)*COMP_BYTE_SIZE);
    }
    do 
    {
        comp *a = sa;
        comp b = *sb++;
        long_comp tmp;
        int i_plus_j = i;
        carry = 0;
        j = n;
        if (outer_partial && i_plus_j < outer_partial)
        {
            i_plus_j = outer_partial;
            a = &sa[outer_partial-i];
            j = n-(outer_partial-i);
        }
        do
        {
            if (inner_partial && i_plus_j >= inner_partial) 
            {
                break;
            }
            tmp = sr[i_plus_j] + ((long_comp)*a++)*b + carry;
            sr[i_plus_j++] = (comp)tmp;              /* downsize */
            carry = (comp)(tmp >> COMP_BIT_SIZE);
        } while (--j != 0);
        sr[i_plus_j] = carry;
    } while (++i < t);
    bi_free(ctx, bia);
    bi_free(ctx, bib);
    return trim(biR);
 }
 /**
 * @brief Perform a single Barrett reduction.
 * @param ctx [in]  The bigint session context.
@ -1342,12 +1282,12 @@ bigint *bi_barrett(BI_CTX *ctx, bigint *bi)
    q1 = comp_right_shift(bi_clone(ctx, bi), k-1);
    /* do outer partial multiply */
-    q2 = partial_multiply(ctx, q1, ctx->bi_mu[mod_offset], 0, k-1); 
+    q2 = regular_multiply(ctx, q1, ctx->bi_mu[mod_offset], 0, k-1); 
    q3 = comp_right_shift(q2, k+1);
    r1 = comp_mod(bi, k+1);
    /* do inner partial multiply */
-    r2 = comp_mod(partial_multiply(ctx, q3, bim, k+1, 0), k+1);
+    r2 = comp_mod(regular_multiply(ctx, q3, bim, k+1, 0), k+1);
    r = bi_subtract(ctx, r1, r2, NULL);
    /* if (r >= m) r = r - m; */
--- a/crypto/bigint_impl.h
+++ b/crypto/bigint_impl.h
@ -41,10 +41,8 @@
 #define BIGINT_NUM_MODS     1    
 #endif
 //#define REGISTER_8          1
 /* Architecture specific functions for big ints */
-#if defined(REGISTER_8)
+#if defined(CONFIG_INTEGER_8BIT)
 #define COMP_RADIX          256U       /**< Max component + 1 */
 #define COMP_MAX            0xFFFFU/**< (Max dbl comp -1) */
 #define COMP_BIT_SIZE       8   /**< Number of bits in a component. */
@ -53,7 +51,7 @@
 typedef uint8_t comp;	        /**< A single precision component. */
 typedef uint16_t long_comp;     /**< A double precision component. */
 typedef int16_t slong_comp;     /**< A signed double precision component. */
-#elif defined(REGISTER_16)
+#elif defined(CONFIG_INTEGER_16BIT)
 #define COMP_RADIX          65536U       /**< Max component + 1 */
 #define COMP_MAX            0xFFFFFFFFU/**< (Max dbl comp -1) */
 #define COMP_BIT_SIZE       16  /**< Number of bits in a component. */
--- a/ssl/BigIntConfig.in
+++ b/ssl/BigIntConfig.in
@ -8,7 +8,7 @@ menu "BigInt Options"
 choice
    prompt "Reduction Algorithm"
-    default CONFIG_BIGINT_CLASSICAL
+    default CONFIG_BIGINT_BARRETT
 config CONFIG_BIGINT_CLASSICAL
    bool "Classical"
@ -21,9 +21,8 @@ config CONFIG_BIGINT_MONTGOMERY
    bool "Montgomery"
    help
        Montgomery uses simple addition and multiplication to achieve its
-        performance. In this implementation it is slower than classical, 
+        performance.  It has the limitation that 0 <= x, y < m, and so is not 
-        and it has the limitation that 0 <= x, y < m, and so is not used 
+        used when CRT is active.
        when CRT is active.
        This option will not be normally selected.
@ -31,9 +30,7 @@ config CONFIG_BIGINT_BARRETT
    bool "Barrett"
    help
        Barrett performs expensive precomputation before reduction and partial
-        multiplies for computational speed. It can't be used with some of the
+        multiplies for computational speed.
        calculations when CRT is used, and so defaults to classical when this
        occurs.
        It is about 40% faster than Classical/Montgomery with the expense of
        about 2kB, and so this option is normally selected.
@ -108,12 +105,9 @@ config CONFIG_BIGINT_SQUARE
    bool "Square Algorithm"
    default y
    help
-        Allow squaring to be used instead of a multiplication.
+        Allow squaring to be used instead of a multiplication. It uses
- 
+        1/2 of the standard multiplies to obtain its performance.  
-        Squaring is theoretically 50% faster than a standard multiply 
+        It gives a 20% speed improvement overall and so should be selected.
        (but is actually about 25% faster). 
        It gives a 20% speed improvement and so should be selected.
 config CONFIG_BIGINT_CHECK_ON
    bool "BigInt Integrity Checking"
@ -126,7 +120,26 @@ config CONFIG_BIGINT_CHECK_ON
        This option is only selected when developing and should normally be
        turned off.
 choice
    prompt "Integer Size"
    default CONFIG_INTEGER_32BIT
 config CONFIG_INTEGER_32BIT
    bool "32"
    help
        The native integer size is 32 bits or higher.
 config CONFIG_INTEGER_16BIT
    bool "16"
    help
        The native integer size is 16 bits.
 config CONFIG_INTEGER_8BIT
    bool "8"
    help
        The native integer size is 8 bits.
 endchoice
 endmenu
--- a/ssl/test/perf_bigint.c
+++ b/ssl/test/perf_bigint.c
@ -45,10 +45,11 @@
 int main(int argc, char *argv[])
 {
 #ifdef CONFIG_SSL_CERT_VERIFICATION
-    RSA_CTX *rsa_ctx;
+    RSA_CTX *rsa_ctx = NULL;
    BI_CTX *ctx;
    bigint *bi_data, *bi_res;
-    int diff, res = 1;
+    float diff;
    int res = 1;
    struct timeval tv_old, tv_new;
    const char *plaintext;
    uint8_t compare[MAX_KEY_BYTE_SIZE];
@ -84,7 +85,7 @@ int main(int argc, char *argv[])
    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("512 bit decrypt time: %dms\n", diff/max_biggie);
+    printf("512 bit decrypt time: %.2fms\n", diff/max_biggie);
    TTY_FLUSH();
    bi_export(ctx, bi_res, compare, 64);
    RSA_free(rsa_ctx);
@ -100,6 +101,7 @@ int main(int argc, char *argv[])
        "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";
    len = get_file("../ssl/test/axTLS.key_1024", &buf);
    rsa_ctx = NULL;
    asn1_get_private_key(buf, len, &rsa_ctx);
    ctx = rsa_ctx->bi_ctx;
    bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -121,7 +123,7 @@ int main(int argc, char *argv[])
    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("1024 bit decrypt time: %dms\n", diff/max_biggie);
+    printf("1024 bit decrypt time: %.2fms\n", diff/max_biggie);
    TTY_FLUSH();
    bi_export(ctx, bi_res, compare, 128);
    RSA_free(rsa_ctx);
@ -139,6 +141,7 @@ int main(int argc, char *argv[])
        "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";
    len = get_file("../ssl/test/axTLS.key_2048", &buf);
    rsa_ctx = NULL;
    asn1_get_private_key(buf, len, &rsa_ctx);
    ctx = rsa_ctx->bi_ctx;
    bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -159,7 +162,7 @@ int main(int argc, char *argv[])
    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("2048 bit decrypt time: %dms\n", diff/max_biggie);
+    printf("2048 bit decrypt time: %.2fms\n", diff/max_biggie);
    TTY_FLUSH();
    bi_export(ctx, bi_res, compare, 256);
    RSA_free(rsa_ctx);
@ -181,6 +184,7 @@ int main(int argc, char *argv[])
        "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";
    len = get_file("../ssl/test/axTLS.key_4096", &buf);
    rsa_ctx = NULL;
    asn1_get_private_key(buf, len, &rsa_ctx);
    ctx = rsa_ctx->bi_ctx;
    bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -189,7 +193,7 @@ int main(int argc, char *argv[])
    gettimeofday(&tv_new, NULL);
    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("4096 bit encrypt time: %dms\n", diff);
+    printf("4096 bit encrypt time: %.2fms\n", diff);
    TTY_FLUSH();
    bi_data = bi_res;   /* reuse again */
@ -208,7 +212,7 @@ int main(int argc, char *argv[])
    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("4096 bit decrypt time: %dms\n", diff/max_biggie);
+    printf("4096 bit decrypt time: %.2fms\n", diff/max_biggie);
    TTY_FLUSH();
    bi_export(ctx, bi_res, compare, 512);
    RSA_free(rsa_ctx);
--- a/ssl/test/ssltest.c
+++ b/ssl/test/ssltest.c
@ -428,8 +428,8 @@ static int BIGINT_test(BI_CTX *ctx)
 {
    int res = 1;
-#ifndef REGISTER_8 
+#ifndef CONFIG_INTEGER_8BIT 
-#ifndef REGISTER_16
+#ifndef CONFIG_INTEGER_16BIT 
    bigint *bi_data, *bi_exp, *bi_res;
    const char *expnt, *plaintext, *mod;
    uint8_t compare[MAX_KEY_BYTE_SIZE];
--- a/www/index.html
+++ b/www/index.html