Commit 3b2639b9 authored by Matt Johnston's avatar Matt Johnston
Browse files

ltm 0.30 orig import

--HG--
branch : libtommath-orig
extra : convert_revision : 29392f1198b77a2021e78879c1489e84c9e98ef4
parents
LibTomMath is hereby released into the Public Domain.
-- Tom St Denis
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, [email protected], http://math.libtomcrypt.org
*/
#include <tommath.h>
static const struct {
int code;
char *msg;
} msgs[] = {
{ MP_OKAY, "Successful" },
{ MP_MEM, "Out of heap" },
{ MP_VAL, "Value out of range" }
};
/* return a char * string for a given code */
char *mp_error_to_string(int code)
{
int x;
/* scan the lookup table for the given message */
for (x = 0; x < (int)(sizeof(msgs) / sizeof(msgs[0])); x++) {
if (msgs[x].code == code) {
return msgs[x].msg;
}
}
/* generic reply for invalid code */
return "Invalid error code";
}
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, [email protected], http://math.libtomcrypt.org
*/
#include <tommath.h>
/* computes the modular inverse via binary extended euclidean algorithm,
* that is c = 1/a mod b
*
* Based on mp_invmod except this is optimized for the case where b is
* odd as per HAC Note 14.64 on pp. 610
*/
int
fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
{
mp_int x, y, u, v, B, D;
int res, neg;
/* 2. [modified] b must be odd */
if (mp_iseven (b) == 1) {
return MP_VAL;
}
/* init all our temps */
if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
return res;
}
/* x == modulus, y == value to invert */
if ((res = mp_copy (b, &x)) != MP_OKAY) {
goto __ERR;
}
/* we need y = |a| */
if ((res = mp_abs (a, &y)) != MP_OKAY) {
goto __ERR;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
if ((res = mp_copy (&x, &u)) != MP_OKAY) {
goto __ERR;
}
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
goto __ERR;
}
mp_set (&D, 1);
top:
/* 4. while u is even do */
while (mp_iseven (&u) == 1) {
/* 4.1 u = u/2 */
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
goto __ERR;
}
/* 4.2 if B is odd then */
if (mp_isodd (&B) == 1) {
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
goto __ERR;
}
}
/* B = B/2 */
if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
goto __ERR;
}
}
/* 5. while v is even do */
while (mp_iseven (&v) == 1) {
/* 5.1 v = v/2 */
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
goto __ERR;
}
/* 5.2 if D is odd then */
if (mp_isodd (&D) == 1) {
/* D = (D-x)/2 */
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
goto __ERR;
}
}
/* D = D/2 */
if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
goto __ERR;
}
}
/* 6. if u >= v then */
if (mp_cmp (&u, &v) != MP_LT) {
/* u = u - v, B = B - D */
if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
goto __ERR;
}
if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
goto __ERR;
}
} else {
/* v - v - u, D = D - B */
if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
goto __ERR;
}
if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
goto __ERR;
}
}
/* if not zero goto step 4 */
if (mp_iszero (&u) == 0) {
goto top;
}
/* now a = C, b = D, gcd == g*v */
/* if v != 1 then there is no inverse */
if (mp_cmp_d (&v, 1) != MP_EQ) {
res = MP_VAL;
goto __ERR;
}
/* b is now the inverse */
neg = a->sign;
while (D.sign == MP_NEG) {
if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
goto __ERR;
}
}
mp_exch (&D, c);
c->sign = neg;
res = MP_OKAY;
__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
return res;
}
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, [email protected], http://math.libtomcrypt.org
*/
#include <tommath.h>
/* computes xR**-1 == x (mod N) via Montgomery Reduction
*
* This is an optimized implementation of mp_montgomery_reduce
* which uses the comba method to quickly calculate the columns of the
* reduction.
*
* Based on Algorithm 14.32 on pp.601 of HAC.
*/
int
fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{
int ix, res, olduse;
mp_word W[MP_WARRAY];
/* get old used count */
olduse = x->used;
/* grow a as required */
if (x->alloc < n->used + 1) {
if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
return res;
}
}
/* first we have to get the digits of the input into
* an array of double precision words W[...]
*/
{
register mp_word *_W;
register mp_digit *tmpx;
/* alias for the W[] array */
_W = W;
/* alias for the digits of x*/
tmpx = x->dp;
/* copy the digits of a into W[0..a->used-1] */
for (ix = 0; ix < x->used; ix++) {
*_W++ = *tmpx++;
}
/* zero the high words of W[a->used..m->used*2] */
for (; ix < n->used * 2 + 1; ix++) {
*_W++ = 0;
}
}
/* now we proceed to zero successive digits
* from the least significant upwards
*/
for (ix = 0; ix < n->used; ix++) {
/* mu = ai * m' mod b
*
* We avoid a double precision multiplication (which isn't required)
* by casting the value down to a mp_digit. Note this requires
* that W[ix-1] have the carry cleared (see after the inner loop)
*/
register mp_digit mu;
mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
/* a = a + mu * m * b**i
*
* This is computed in place and on the fly. The multiplication
* by b**i is handled by offseting which columns the results
* are added to.
*
* Note the comba method normally doesn't handle carries in the
* inner loop In this case we fix the carry from the previous
* column since the Montgomery reduction requires digits of the
* result (so far) [see above] to work. This is
* handled by fixing up one carry after the inner loop. The
* carry fixups are done in order so after these loops the
* first m->used words of W[] have the carries fixed
*/
{
register int iy;
register mp_digit *tmpn;
register mp_word *_W;
/* alias for the digits of the modulus */
tmpn = n->dp;
/* Alias for the columns set by an offset of ix */
_W = W + ix;
/* inner loop */
for (iy = 0; iy < n->used; iy++) {
*_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
}
}
/* now fix carry for next digit, W[ix+1] */
W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
}
/* now we have to propagate the carries and
* shift the words downward [all those least
* significant digits we zeroed].
*/
{
register mp_digit *tmpx;
register mp_word *_W, *_W1;
/* nox fix rest of carries */
/* alias for current word */
_W1 = W + ix;
/* alias for next word, where the carry goes */
_W = W + ++ix;
for (; ix <= n->used * 2 + 1; ix++) {
*_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
}
/* copy out, A = A/b**n
*
* The result is A/b**n but instead of converting from an
* array of mp_word to mp_digit than calling mp_rshd
* we just copy them in the right order
*/
/* alias for destination word */
tmpx = x->dp;
/* alias for shifted double precision result */
_W = W + n->used;
for (ix = 0; ix < n->used + 1; ix++) {
*tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
}
/* zero oldused digits, if the input a was larger than
* m->used+1 we'll have to clear the digits
*/
for (; ix < olduse; ix++) {
*tmpx++ = 0;
}
}
/* set the max used and clamp */
x->used = n->used + 1;
mp_clamp (x);
/* if A >= m then A = A - m */
if (mp_cmp_mag (x, n) != MP_LT) {
return s_mp_sub (x, n, x);
}
return MP_OKAY;
}
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, [email protected], http://math.libtomcrypt.org
*/
#include <tommath.h>
/* Fast (comba) multiplier
*
* This is the fast column-array [comba] multiplier. It is
* designed to compute the columns of the product first
* then handle the carries afterwards. This has the effect
* of making the nested loops that compute the columns very
* simple and schedulable on super-scalar processors.
*
* This has been modified to produce a variable number of
* digits of output so if say only a half-product is required
* you don't have to compute the upper half (a feature
* required for fast Barrett reduction).
*
* Based on Algorithm 14.12 on pp.595 of HAC.
*
*/
int
fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
int olduse, res, pa, ix;
mp_word W[MP_WARRAY];
/* grow the destination as required */
if (c->alloc < digs) {
if ((res = mp_grow (c, digs)) != MP_OKAY) {
return res;
}
}
/* clear temp buf (the columns) */
memset (W, 0, sizeof (mp_word) * digs);
/* calculate the columns */
pa = a->used;
for (ix = 0; ix < pa; ix++) {
/* this multiplier has been modified to allow you to
* control how many digits of output are produced.
* So at most we want to make upto "digs" digits of output.
*
* this adds products to distinct columns (at ix+iy) of W
* note that each step through the loop is not dependent on
* the previous which means the compiler can easily unroll
* the loop without scheduling problems
*/
{
register mp_digit tmpx, *tmpy;
register mp_word *_W;
register int iy, pb;
/* alias for the the word on the left e.g. A[ix] * A[iy] */
tmpx = a->dp[ix];
/* alias for the right side */
tmpy = b->dp;
/* alias for the columns, each step through the loop adds a new
term to each column
*/
_W = W + ix;
/* the number of digits is limited by their placement. E.g.
we avoid multiplying digits that will end up above the # of
digits of precision requested
*/
pb = MIN (b->used, digs - ix);
for (iy = 0; iy < pb; iy++) {
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
}
}
}
/* setup dest */
olduse = c->used;
c->used = digs;
{
register mp_digit *tmpc;
/* At this point W[] contains the sums of each column. To get the
* correct result we must take the extra bits from each column and
* carry them down
*
* Note that while this adds extra code to the multiplier it
* saves time since the carry propagation is removed from the
* above nested loop.This has the effect of reducing the work
* from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the
* cost of the shifting. On very small numbers this is slower
* but on most cryptographic size numbers it is faster.
*
* In this particular implementation we feed the carries from
* behind which means when the loop terminates we still have one
* last digit to copy
*/
tmpc = c->dp;
for (ix = 1; ix < digs; ix++) {
/* forward the carry from the previous temp */
W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
/* now extract the previous digit [below the carry] */
*tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
}
/* fetch the last digit */
*tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));
/* clear unused digits [that existed in the old copy of c] */
for (; ix < olduse; ix++) {
*tmpc++ = 0;
}
}
mp_clamp (c);
return MP_OKAY;
}
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, [email protected], http://math.libtomcrypt.org
*/
#include <tommath.h>
/* this is a modified version of fast_s_mp_mul_digs that only produces
* output digits *above* digs. See the comments for fast_s_mp_mul_digs
* to see how it works.
*
* This is used in the Barrett reduction since for one of the multiplications
* only the higher digits were needed. This essentially halves the work.
*
* Based on Algorithm 14.12 on pp.595 of HAC.
*/
int
fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
int oldused, newused, res, pa, pb, ix;
mp_word W[MP_WARRAY];
/* calculate size of product and allocate more space if required */
newused = a->used + b->used + 1;
if (c->alloc < newused) {
if ((res = mp_grow (c, newused)) != MP_OKAY) {
return res;
}
}
/* like the other comba method we compute the columns first */
pa = a->used;
pb = b->used;
memset (W + digs, 0, (pa + pb + 1 - digs) * sizeof (mp_word));
for (ix = 0; ix < pa; ix++) {
{
register mp_digit tmpx, *tmpy;
register int iy;
register mp_word *_W;
/* work todo, that is we only calculate digits that are at "digs" or above */
iy = digs - ix;
/* copy of word on the left of A[ix] * B[iy] */
tmpx = a->dp[ix];
/* alias for right side */
tmpy = b->dp + iy;
/* alias for the columns of output. Offset to be equal to or above the
* smallest digit place requested
*/
_W = W + digs;
/* skip cases below zero where ix > digs */
if (iy < 0) {
iy = abs(iy);
tmpy += iy;
_W += iy;
iy = 0;
}
/* compute column products for digits above the minimum */
for (; iy < pb; iy++) {
*_W++ += ((mp_word) tmpx) * ((mp_word)*tmpy++);
}
}
}
/* setup dest */
oldused = c->used;
c->used = newused;
/* now convert the array W downto what we need
*
* See comments in bn_fast_s_mp_mul_digs.c
*/
for (ix = digs + 1; ix < newused; ix++) {
W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
c->dp[ix - 1] = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
}
c->dp[newused - 1] = (mp_digit) (W[newused - 1] & ((mp_word) MP_MASK));
for (; ix < oldused; ix++) {
c->dp[ix] = 0;
}
mp_clamp (c);
return MP_OKAY;
}
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, [email protected], http://math.libtomcrypt.org
*/
#include <tommath.h>
/* fast squaring
*
* This is the comba method where the columns of the product
* are computed first then the carries are computed. This
* has the effect of making a very simple inner loop that
* is executed the most
*
* W2 represents the outer products and W the inner.
*
* A further optimizations is made because the inner
* products are of the form "A * B * 2". The *2 part does
* not need to be computed until the end which is good
* because 64-bit shifts are slow!
*
* Based on Algorithm 14.16 on pp.597 of HAC.
*
*/
int fast_s_mp_sqr (mp_int * a, mp_int * b)
{