merged partial_multiply with regular_multiply function.

git-svn-id: svn://svn.code.sf.net/p/axtls/code/trunk@182 9a5d90b5-6617-0410-8a86-bb477d3ed2e3
2025-07-10 04:22:05 +03:00 · 2011-01-02 08:30:53 +00:00
parent 0d2e75b9c7
commit 8c18da4f1e
6 changed files with 73 additions and 118 deletions
--- a/crypto/bigint.c
+++ b/crypto/bigint.c
@ -801,11 +801,16 @@ void bi_free_mod(BI_CTX *ctx, int mod_offset)

 /** 
 * Perform a standard multiplication between two bigints.
+ *
+ * Barrett reduction has no need for some parts of the product, so ignore bits
+ * of the multiply. This routine gives Barrett its big performance
+ * improvements over Classical/Montgomery reduction methods. 
 */
-static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)
+static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib, 
+        int inner_partial, int outer_partial)
 {
-    int i, j, i_plus_j;
-    int n = bia->size; 
+    int i = 0, j;
+    int n = bia->size;
    int t = bib->size;
    bigint *biR = alloc(ctx, n + t);
    comp *sr = biR->comps;
@ -817,23 +822,33 @@ static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)

    /* clear things to start with */
    memset(biR->comps, 0, ((n+t)*COMP_BYTE_SIZE));
-    i = 0;

    do 
    {
        comp carry = 0;
        comp b = *sb++;
-        i_plus_j = i;
+        int r_index = i;
        j = 0;

+        if (outer_partial)
+        {
+            r_index = outer_partial-1;
+            j = outer_partial-i-1;
+        }
+
        do
        {
-            long_comp tmp = sr[i_plus_j] + (long_comp)sa[j]*b + carry;
-            sr[i_plus_j++] = (comp)tmp;              /* downsize */
-            carry = (comp)(tmp >> COMP_BIT_SIZE);
+            if (inner_partial && r_index >= inner_partial) 
+            {
+                break;
+            }
+
+            long_comp tmp = sr[r_index] + ((long_comp)sa[j])*b + carry;
+            sr[r_index++] = (comp)tmp;              /* downsize */
+            carry = tmp >> COMP_BIT_SIZE;
        } while (++j < n);

-        sr[i_plus_j] = carry;
+        sr[r_index] = carry;
    } while (++i < t);

    bi_free(ctx, bia);
@ -913,12 +928,12 @@ bigint *bi_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)
 #ifdef CONFIG_BIGINT_KARATSUBA
    if (min(bia->size, bib->size) < MUL_KARATSUBA_THRESH)
    {
-        return regular_multiply(ctx, bia, bib);
+        return regular_multiply(ctx, bia, bib, 0, 0);
    }

    return karatsuba(ctx, bia, bib, 0);
 #else
-    return regular_multiply(ctx, bia, bib);
+    return regular_multiply(ctx, bia, bib, 0, 0);
 #endif
 }

@ -941,7 +956,7 @@ static bigint *regular_square(BI_CTX *ctx, bigint *bi)
        long_comp tmp = w[2*i] + (long_comp)x[i]*x[i];
        uint8_t c = 0;
        w[2*i] = (comp)tmp;
-        carry = (comp)(tmp >> COMP_BIT_SIZE);
+        carry = tmp >> COMP_BIT_SIZE;

        for (j = i+1; j < t; j++)
        {
@ -1242,81 +1257,6 @@ static bigint *comp_mod(bigint *bi, int mod)
    return bi;
 }

-/*
- * Barrett reduction has no need for some parts of the product, so ignore bits
- * of the multiply. This routine gives Barrett its big performance
- * improvements over Classical/Montgomery reduction methods. 
- */
-static bigint *partial_multiply(BI_CTX *ctx, bigint *bia, bigint *bib, 
-        int inner_partial, int outer_partial)
-{
-    int i = 0, j, n = bia->size, t = bib->size;
-    bigint *biR;
-    comp carry;
-    comp *sr, *sa, *sb;
-
-    check(bia);
-    check(bib);
-
-    biR = alloc(ctx, n + t);
-    sa = bia->comps;
-    sb = bib->comps;
-    sr = biR->comps;
-
-    if (inner_partial)
-    {
-        memset(sr, 0, inner_partial*COMP_BYTE_SIZE); 
-    }
-    else    /* outer partial */
-    {
-        if (n < outer_partial || t < outer_partial) /* should we bother? */
-        {
-            bi_free(ctx, bia);
-            bi_free(ctx, bib);
-            biR->comps[0] = 0;      /* return 0 */
-            biR->size = 1;
-            return biR;
-        }
-
-        memset(&sr[outer_partial], 0, (n+t-outer_partial)*COMP_BYTE_SIZE);
-    }
-
-    do 
-    {
-        comp *a = sa;
-        comp b = *sb++;
-        long_comp tmp;
-        int i_plus_j = i;
-        carry = 0;
-        j = n;
-
-        if (outer_partial && i_plus_j < outer_partial)
-        {
-            i_plus_j = outer_partial;
-            a = &sa[outer_partial-i];
-            j = n-(outer_partial-i);
-        }
-
-        do
-        {
-            if (inner_partial && i_plus_j >= inner_partial) 
-            {
-                break;
-            }
-
-            tmp = sr[i_plus_j] + ((long_comp)*a++)*b + carry;
-            sr[i_plus_j++] = (comp)tmp;              /* downsize */
-            carry = (comp)(tmp >> COMP_BIT_SIZE);
-        } while (--j != 0);
-
-        sr[i_plus_j] = carry;
-    } while (++i < t);
-
-    bi_free(ctx, bia);
-    bi_free(ctx, bib);
-    return trim(biR);
-}
-
 /**
 * @brief Perform a single Barrett reduction.
 * @param ctx [in]  The bigint session context.
@ -1342,12 +1282,12 @@ bigint *bi_barrett(BI_CTX *ctx, bigint *bi)
    q1 = comp_right_shift(bi_clone(ctx, bi), k-1);

    /* do outer partial multiply */
-    q2 = partial_multiply(ctx, q1, ctx->bi_mu[mod_offset], 0, k-1); 
+    q2 = regular_multiply(ctx, q1, ctx->bi_mu[mod_offset], 0, k-1); 
    q3 = comp_right_shift(q2, k+1);
    r1 = comp_mod(bi, k+1);

    /* do inner partial multiply */
-    r2 = comp_mod(partial_multiply(ctx, q3, bim, k+1, 0), k+1);
+    r2 = comp_mod(regular_multiply(ctx, q3, bim, k+1, 0), k+1);
    r = bi_subtract(ctx, r1, r2, NULL);

    /* if (r >= m) r = r - m; */
--- a/crypto/bigint_impl.h
+++ b/crypto/bigint_impl.h
@ -41,10 +41,8 @@
 #define BIGINT_NUM_MODS     1    
 #endif

-//#define REGISTER_8          1
-
 /* Architecture specific functions for big ints */
-#if defined(REGISTER_8)
+#if defined(CONFIG_INTEGER_8BIT)
 #define COMP_RADIX          256U       /**< Max component + 1 */
 #define COMP_MAX            0xFFFFU/**< (Max dbl comp -1) */
 #define COMP_BIT_SIZE       8   /**< Number of bits in a component. */
@ -53,7 +51,7 @@
 typedef uint8_t comp;	        /**< A single precision component. */
 typedef uint16_t long_comp;     /**< A double precision component. */
 typedef int16_t slong_comp;     /**< A signed double precision component. */
-#elif defined(REGISTER_16)
+#elif defined(CONFIG_INTEGER_16BIT)
 #define COMP_RADIX          65536U       /**< Max component + 1 */
 #define COMP_MAX            0xFFFFFFFFU/**< (Max dbl comp -1) */
 #define COMP_BIT_SIZE       16  /**< Number of bits in a component. */
--- a/ssl/BigIntConfig.in
+++ b/ssl/BigIntConfig.in
@ -8,7 +8,7 @@ menu "BigInt Options"

 choice
    prompt "Reduction Algorithm"
-    default CONFIG_BIGINT_CLASSICAL
+    default CONFIG_BIGINT_BARRETT

 config CONFIG_BIGINT_CLASSICAL
    bool "Classical"
@ -21,9 +21,8 @@ config CONFIG_BIGINT_MONTGOMERY
    bool "Montgomery"
    help
        Montgomery uses simple addition and multiplication to achieve its
-        performance. In this implementation it is slower than classical, 
-        and it has the limitation that 0 <= x, y < m, and so is not used 
-        when CRT is active.
+        performance.  It has the limitation that 0 <= x, y < m, and so is not 
+        used when CRT is active.

        This option will not be normally selected.

@ -31,9 +30,7 @@ config CONFIG_BIGINT_BARRETT
    bool "Barrett"
    help
        Barrett performs expensive precomputation before reduction and partial
-        multiplies for computational speed. It can't be used with some of the
-        calculations when CRT is used, and so defaults to classical when this
-        occurs.
+        multiplies for computational speed.

        It is about 40% faster than Classical/Montgomery with the expense of
        about 2kB, and so this option is normally selected.
@ -108,12 +105,9 @@ config CONFIG_BIGINT_SQUARE
    bool "Square Algorithm"
    default y
    help
-        Allow squaring to be used instead of a multiplication.
- 
-        Squaring is theoretically 50% faster than a standard multiply 
-        (but is actually about 25% faster). 
-
-        It gives a 20% speed improvement and so should be selected.
+        Allow squaring to be used instead of a multiplication. It uses
+        1/2 of the standard multiplies to obtain its performance.  
+        It gives a 20% speed improvement overall and so should be selected.

 config CONFIG_BIGINT_CHECK_ON
    bool "BigInt Integrity Checking"
@ -126,7 +120,26 @@ config CONFIG_BIGINT_CHECK_ON
        This option is only selected when developing and should normally be
        turned off.

+choice
+    prompt "Integer Size"
+    default CONFIG_INTEGER_32BIT
+
+config CONFIG_INTEGER_32BIT
+    bool "32"
+    help
+        The native integer size is 32 bits or higher.
+        
+
+config CONFIG_INTEGER_16BIT
+    bool "16"
+    help
+        The native integer size is 16 bits.
+
+config CONFIG_INTEGER_8BIT
+    bool "8"
+    help
+        The native integer size is 8 bits.
+
+endchoice
 endmenu

-
-
--- a/ssl/test/perf_bigint.c
+++ b/ssl/test/perf_bigint.c
@ -45,10 +45,11 @@
 int main(int argc, char *argv[])
 {
 #ifdef CONFIG_SSL_CERT_VERIFICATION
-    RSA_CTX *rsa_ctx;
+    RSA_CTX *rsa_ctx = NULL;
    BI_CTX *ctx;
    bigint *bi_data, *bi_res;
-    int diff, res = 1;
+    float diff;
+    int res = 1;
    struct timeval tv_old, tv_new;
    const char *plaintext;
    uint8_t compare[MAX_KEY_BYTE_SIZE];
@ -84,7 +85,7 @@ int main(int argc, char *argv[])

    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("512 bit decrypt time: %dms\n", diff/max_biggie);
+    printf("512 bit decrypt time: %.2fms\n", diff/max_biggie);
    TTY_FLUSH();
    bi_export(ctx, bi_res, compare, 64);
    RSA_free(rsa_ctx);
@ -100,6 +101,7 @@ int main(int argc, char *argv[])
        "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";

    len = get_file("../ssl/test/axTLS.key_1024", &buf);
+    rsa_ctx = NULL;
    asn1_get_private_key(buf, len, &rsa_ctx);
    ctx = rsa_ctx->bi_ctx;
    bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -121,7 +123,7 @@ int main(int argc, char *argv[])

    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("1024 bit decrypt time: %dms\n", diff/max_biggie);
+    printf("1024 bit decrypt time: %.2fms\n", diff/max_biggie);
    TTY_FLUSH();
    bi_export(ctx, bi_res, compare, 128);
    RSA_free(rsa_ctx);
@ -139,6 +141,7 @@ int main(int argc, char *argv[])
        "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";

    len = get_file("../ssl/test/axTLS.key_2048", &buf);
+    rsa_ctx = NULL;
    asn1_get_private_key(buf, len, &rsa_ctx);
    ctx = rsa_ctx->bi_ctx;
    bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -159,7 +162,7 @@ int main(int argc, char *argv[])

    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("2048 bit decrypt time: %dms\n", diff/max_biggie);
+    printf("2048 bit decrypt time: %.2fms\n", diff/max_biggie);
    TTY_FLUSH();
    bi_export(ctx, bi_res, compare, 256);
    RSA_free(rsa_ctx);
@ -181,6 +184,7 @@ int main(int argc, char *argv[])
        "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";

    len = get_file("../ssl/test/axTLS.key_4096", &buf);
+    rsa_ctx = NULL;
    asn1_get_private_key(buf, len, &rsa_ctx);
    ctx = rsa_ctx->bi_ctx;
    bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -189,7 +193,7 @@ int main(int argc, char *argv[])
    gettimeofday(&tv_new, NULL);
    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("4096 bit encrypt time: %dms\n", diff);
+    printf("4096 bit encrypt time: %.2fms\n", diff);
    TTY_FLUSH();
    bi_data = bi_res;   /* reuse again */

@ -208,7 +212,7 @@ int main(int argc, char *argv[])

    diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
                (tv_new.tv_usec-tv_old.tv_usec)/1000;
-    printf("4096 bit decrypt time: %dms\n", diff/max_biggie);
+    printf("4096 bit decrypt time: %.2fms\n", diff/max_biggie);
    TTY_FLUSH();
    bi_export(ctx, bi_res, compare, 512);
    RSA_free(rsa_ctx);
--- a/ssl/test/ssltest.c
+++ b/ssl/test/ssltest.c
@ -428,8 +428,8 @@ static int BIGINT_test(BI_CTX *ctx)
 {
    int res = 1;

-#ifndef REGISTER_8 
-#ifndef REGISTER_16
+#ifndef CONFIG_INTEGER_8BIT 
+#ifndef CONFIG_INTEGER_16BIT 
    bigint *bi_data, *bi_exp, *bi_res;
    const char *expnt, *plaintext, *mod;
    uint8_t compare[MAX_KEY_BYTE_SIZE];
--- a/www/index.html
+++ b/www/index.html