1
0
mirror of https://github.com/esp8266/Arduino.git synced 2025-04-25 20:02:37 +03:00

merged partial_multiply with regular_multiply function.

git-svn-id: svn://svn.code.sf.net/p/axtls/code/trunk@182 9a5d90b5-6617-0410-8a86-bb477d3ed2e3
This commit is contained in:
cameronrich 2011-01-02 08:30:53 +00:00
parent 0d2e75b9c7
commit 8c18da4f1e
6 changed files with 73 additions and 118 deletions

View File

@ -801,11 +801,16 @@ void bi_free_mod(BI_CTX *ctx, int mod_offset)
/**
* Perform a standard multiplication between two bigints.
*
* Barrett reduction has no need for some parts of the product, so ignore bits
* of the multiply. This routine gives Barrett its big performance
* improvements over Classical/Montgomery reduction methods.
*/
static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)
static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib,
int inner_partial, int outer_partial)
{
int i, j, i_plus_j;
int n = bia->size;
int i = 0, j;
int n = bia->size;
int t = bib->size;
bigint *biR = alloc(ctx, n + t);
comp *sr = biR->comps;
@ -817,23 +822,33 @@ static bigint *regular_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)
/* clear things to start with */
memset(biR->comps, 0, ((n+t)*COMP_BYTE_SIZE));
i = 0;
do
{
comp carry = 0;
comp b = *sb++;
i_plus_j = i;
int r_index = i;
j = 0;
if (outer_partial)
{
r_index = outer_partial-1;
j = outer_partial-i-1;
}
do
{
long_comp tmp = sr[i_plus_j] + (long_comp)sa[j]*b + carry;
sr[i_plus_j++] = (comp)tmp; /* downsize */
carry = (comp)(tmp >> COMP_BIT_SIZE);
if (inner_partial && r_index >= inner_partial)
{
break;
}
long_comp tmp = sr[r_index] + ((long_comp)sa[j])*b + carry;
sr[r_index++] = (comp)tmp; /* downsize */
carry = tmp >> COMP_BIT_SIZE;
} while (++j < n);
sr[i_plus_j] = carry;
sr[r_index] = carry;
} while (++i < t);
bi_free(ctx, bia);
@ -913,12 +928,12 @@ bigint *bi_multiply(BI_CTX *ctx, bigint *bia, bigint *bib)
#ifdef CONFIG_BIGINT_KARATSUBA
if (min(bia->size, bib->size) < MUL_KARATSUBA_THRESH)
{
return regular_multiply(ctx, bia, bib);
return regular_multiply(ctx, bia, bib, 0, 0);
}
return karatsuba(ctx, bia, bib, 0);
#else
return regular_multiply(ctx, bia, bib);
return regular_multiply(ctx, bia, bib, 0, 0);
#endif
}
@ -941,7 +956,7 @@ static bigint *regular_square(BI_CTX *ctx, bigint *bi)
long_comp tmp = w[2*i] + (long_comp)x[i]*x[i];
uint8_t c = 0;
w[2*i] = (comp)tmp;
carry = (comp)(tmp >> COMP_BIT_SIZE);
carry = tmp >> COMP_BIT_SIZE;
for (j = i+1; j < t; j++)
{
@ -1242,81 +1257,6 @@ static bigint *comp_mod(bigint *bi, int mod)
return bi;
}
/*
* Barrett reduction has no need for some parts of the product, so ignore bits
* of the multiply. This routine gives Barrett its big performance
* improvements over Classical/Montgomery reduction methods.
*/
static bigint *partial_multiply(BI_CTX *ctx, bigint *bia, bigint *bib,
int inner_partial, int outer_partial)
{
int i = 0, j, n = bia->size, t = bib->size;
bigint *biR;
comp carry;
comp *sr, *sa, *sb;
check(bia);
check(bib);
biR = alloc(ctx, n + t);
sa = bia->comps;
sb = bib->comps;
sr = biR->comps;
if (inner_partial)
{
memset(sr, 0, inner_partial*COMP_BYTE_SIZE);
}
else /* outer partial */
{
if (n < outer_partial || t < outer_partial) /* should we bother? */
{
bi_free(ctx, bia);
bi_free(ctx, bib);
biR->comps[0] = 0; /* return 0 */
biR->size = 1;
return biR;
}
memset(&sr[outer_partial], 0, (n+t-outer_partial)*COMP_BYTE_SIZE);
}
do
{
comp *a = sa;
comp b = *sb++;
long_comp tmp;
int i_plus_j = i;
carry = 0;
j = n;
if (outer_partial && i_plus_j < outer_partial)
{
i_plus_j = outer_partial;
a = &sa[outer_partial-i];
j = n-(outer_partial-i);
}
do
{
if (inner_partial && i_plus_j >= inner_partial)
{
break;
}
tmp = sr[i_plus_j] + ((long_comp)*a++)*b + carry;
sr[i_plus_j++] = (comp)tmp; /* downsize */
carry = (comp)(tmp >> COMP_BIT_SIZE);
} while (--j != 0);
sr[i_plus_j] = carry;
} while (++i < t);
bi_free(ctx, bia);
bi_free(ctx, bib);
return trim(biR);
}
/**
* @brief Perform a single Barrett reduction.
* @param ctx [in] The bigint session context.
@ -1342,12 +1282,12 @@ bigint *bi_barrett(BI_CTX *ctx, bigint *bi)
q1 = comp_right_shift(bi_clone(ctx, bi), k-1);
/* do outer partial multiply */
q2 = partial_multiply(ctx, q1, ctx->bi_mu[mod_offset], 0, k-1);
q2 = regular_multiply(ctx, q1, ctx->bi_mu[mod_offset], 0, k-1);
q3 = comp_right_shift(q2, k+1);
r1 = comp_mod(bi, k+1);
/* do inner partial multiply */
r2 = comp_mod(partial_multiply(ctx, q3, bim, k+1, 0), k+1);
r2 = comp_mod(regular_multiply(ctx, q3, bim, k+1, 0), k+1);
r = bi_subtract(ctx, r1, r2, NULL);
/* if (r >= m) r = r - m; */

View File

@ -41,10 +41,8 @@
#define BIGINT_NUM_MODS 1
#endif
//#define REGISTER_8 1
/* Architecture specific functions for big ints */
#if defined(REGISTER_8)
#if defined(CONFIG_INTEGER_8BIT)
#define COMP_RADIX 256U /**< Max component + 1 */
#define COMP_MAX 0xFFFFU/**< (Max dbl comp -1) */
#define COMP_BIT_SIZE 8 /**< Number of bits in a component. */
@ -53,7 +51,7 @@
typedef uint8_t comp; /**< A single precision component. */
typedef uint16_t long_comp; /**< A double precision component. */
typedef int16_t slong_comp; /**< A signed double precision component. */
#elif defined(REGISTER_16)
#elif defined(CONFIG_INTEGER_16BIT)
#define COMP_RADIX 65536U /**< Max component + 1 */
#define COMP_MAX 0xFFFFFFFFU/**< (Max dbl comp -1) */
#define COMP_BIT_SIZE 16 /**< Number of bits in a component. */

View File

@ -8,7 +8,7 @@ menu "BigInt Options"
choice
prompt "Reduction Algorithm"
default CONFIG_BIGINT_CLASSICAL
default CONFIG_BIGINT_BARRETT
config CONFIG_BIGINT_CLASSICAL
bool "Classical"
@ -21,9 +21,8 @@ config CONFIG_BIGINT_MONTGOMERY
bool "Montgomery"
help
Montgomery uses simple addition and multiplication to achieve its
performance. In this implementation it is slower than classical,
and it has the limitation that 0 <= x, y < m, and so is not used
when CRT is active.
performance. It has the limitation that 0 <= x, y < m, and so is not
used when CRT is active.
This option will not be normally selected.
@ -31,9 +30,7 @@ config CONFIG_BIGINT_BARRETT
bool "Barrett"
help
Barrett performs expensive precomputation before reduction and partial
multiplies for computational speed. It can't be used with some of the
calculations when CRT is used, and so defaults to classical when this
occurs.
multiplies for computational speed.
It is about 40% faster than Classical/Montgomery with the expense of
about 2kB, and so this option is normally selected.
@ -108,12 +105,9 @@ config CONFIG_BIGINT_SQUARE
bool "Square Algorithm"
default y
help
Allow squaring to be used instead of a multiplication.
Squaring is theoretically 50% faster than a standard multiply
(but is actually about 25% faster).
It gives a 20% speed improvement and so should be selected.
Allow squaring to be used instead of a multiplication. It uses
1/2 of the standard multiplies to obtain its performance.
It gives a 20% speed improvement overall and so should be selected.
config CONFIG_BIGINT_CHECK_ON
bool "BigInt Integrity Checking"
@ -126,7 +120,26 @@ config CONFIG_BIGINT_CHECK_ON
This option is only selected when developing and should normally be
turned off.
choice
prompt "Integer Size"
default CONFIG_INTEGER_32BIT
config CONFIG_INTEGER_32BIT
bool "32"
help
The native integer size is 32 bits or higher.
config CONFIG_INTEGER_16BIT
bool "16"
help
The native integer size is 16 bits.
config CONFIG_INTEGER_8BIT
bool "8"
help
The native integer size is 8 bits.
endchoice
endmenu

View File

@ -45,10 +45,11 @@
int main(int argc, char *argv[])
{
#ifdef CONFIG_SSL_CERT_VERIFICATION
RSA_CTX *rsa_ctx;
RSA_CTX *rsa_ctx = NULL;
BI_CTX *ctx;
bigint *bi_data, *bi_res;
int diff, res = 1;
float diff;
int res = 1;
struct timeval tv_old, tv_new;
const char *plaintext;
uint8_t compare[MAX_KEY_BYTE_SIZE];
@ -84,7 +85,7 @@ int main(int argc, char *argv[])
diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
(tv_new.tv_usec-tv_old.tv_usec)/1000;
printf("512 bit decrypt time: %dms\n", diff/max_biggie);
printf("512 bit decrypt time: %.2fms\n", diff/max_biggie);
TTY_FLUSH();
bi_export(ctx, bi_res, compare, 64);
RSA_free(rsa_ctx);
@ -100,6 +101,7 @@ int main(int argc, char *argv[])
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";
len = get_file("../ssl/test/axTLS.key_1024", &buf);
rsa_ctx = NULL;
asn1_get_private_key(buf, len, &rsa_ctx);
ctx = rsa_ctx->bi_ctx;
bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -121,7 +123,7 @@ int main(int argc, char *argv[])
diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
(tv_new.tv_usec-tv_old.tv_usec)/1000;
printf("1024 bit decrypt time: %dms\n", diff/max_biggie);
printf("1024 bit decrypt time: %.2fms\n", diff/max_biggie);
TTY_FLUSH();
bi_export(ctx, bi_res, compare, 128);
RSA_free(rsa_ctx);
@ -139,6 +141,7 @@ int main(int argc, char *argv[])
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";
len = get_file("../ssl/test/axTLS.key_2048", &buf);
rsa_ctx = NULL;
asn1_get_private_key(buf, len, &rsa_ctx);
ctx = rsa_ctx->bi_ctx;
bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -159,7 +162,7 @@ int main(int argc, char *argv[])
diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
(tv_new.tv_usec-tv_old.tv_usec)/1000;
printf("2048 bit decrypt time: %dms\n", diff/max_biggie);
printf("2048 bit decrypt time: %.2fms\n", diff/max_biggie);
TTY_FLUSH();
bi_export(ctx, bi_res, compare, 256);
RSA_free(rsa_ctx);
@ -181,6 +184,7 @@ int main(int argc, char *argv[])
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*^";
len = get_file("../ssl/test/axTLS.key_4096", &buf);
rsa_ctx = NULL;
asn1_get_private_key(buf, len, &rsa_ctx);
ctx = rsa_ctx->bi_ctx;
bi_data = bi_import(ctx, (uint8_t *)plaintext, strlen(plaintext));
@ -189,7 +193,7 @@ int main(int argc, char *argv[])
gettimeofday(&tv_new, NULL);
diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
(tv_new.tv_usec-tv_old.tv_usec)/1000;
printf("4096 bit encrypt time: %dms\n", diff);
printf("4096 bit encrypt time: %.2fms\n", diff);
TTY_FLUSH();
bi_data = bi_res; /* reuse again */
@ -208,7 +212,7 @@ int main(int argc, char *argv[])
diff = (tv_new.tv_sec-tv_old.tv_sec)*1000 +
(tv_new.tv_usec-tv_old.tv_usec)/1000;
printf("4096 bit decrypt time: %dms\n", diff/max_biggie);
printf("4096 bit decrypt time: %.2fms\n", diff/max_biggie);
TTY_FLUSH();
bi_export(ctx, bi_res, compare, 512);
RSA_free(rsa_ctx);

View File

@ -428,8 +428,8 @@ static int BIGINT_test(BI_CTX *ctx)
{
int res = 1;
#ifndef REGISTER_8
#ifndef REGISTER_16
#ifndef CONFIG_INTEGER_8BIT
#ifndef CONFIG_INTEGER_16BIT
bigint *bi_data, *bi_exp, *bi_res;
const char *expnt, *plaintext, *mod;
uint8_t compare[MAX_KEY_BYTE_SIZE];

File diff suppressed because one or more lines are too long