import of libtommath 0.32
--HG-- branch : libtommath-orig extra : convert_revision : 0dc5b2d6d8c18b7e32b077cd8051bd63835afa1e
No preview for this file type
... | ... | @@ -49,7 +49,7 @@ |
\begin{document} | ||
\frontmatter | ||
\pagestyle{empty} | ||
\title{LibTomMath User Manual \\ v0.30} | ||
\title{LibTomMath User Manual \\ v0.32} | ||
\author{Tom St Denis \\ [email protected]} | ||
\maketitle | ||
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been | ||
... | ... | @@ -96,27 +96,34 @@ LibTomMath is meant to be very ``GCC friendly'' as it comes with a makefile well |
also build in MSVC, Borland C out of the box. For any other ISO C compiler a makefile will have to be made by the end | ||
developer. | ||
To build the library for GCC simply issue the | ||
\subsection{Static Libraries} | ||
To build as a static library for GCC issue the following | ||
\begin{alltt} | ||
make | ||
\end{alltt} | ||
command. This will build the library and archive the object files in ``libtommath.a''. Now you simply link against that | ||
and include ``tommath.h'' within your programs. | ||
Alternatively to build with MSVC type | ||
command. This will build the library and archive the object files in ``libtommath.a''. Now you link against | ||
that and include ``tommath.h'' within your programs. Alternatively to build with MSVC issue the following | ||
\begin{alltt} | ||
nmake -f makefile.msvc | ||
\end{alltt} | ||
This will build the library and archive the object files in ``tommath.lib''. This has been tested with MSVC version 6.00 | ||
with service pack 5. | ||
This will build the library and archive the object files in ``tommath.lib''. This has been tested with MSVC | ||
version 6.00 with service pack 5. | ||
\subsection{Shared Libraries} | ||
To build as a shared library for GCC issue the following | ||
\begin{alltt} | ||
make -f makefile.shared | ||
\end{alltt} | ||
This requires the ``libtool'' package (common on most Linux/BSD systems). It will build LibTomMath as both shared | ||
and static then install (by default) into /usr/lib as well as install the header files in /usr/include. The shared | ||
library (resource) will be called ``libtommath.la'' while the static library called ``libtommath.a''. Generally | ||
you use libtool to link your application against the shared object. | ||
There is limited support for making a ``DLL'' in windows via the ``makefile.cygwin\_dll'' makefile. It requires Cygwin | ||
to work with since it requires the auto-export/import functionality. The resulting DLL and imprt library ``libtomcrypt.dll.a'' | ||
can be used to link LibTomMath dynamically to any Windows program using Cygwin. | ||
There is limited support for making a ``DLL'' in windows via the ``makefile.cygwin\_dll'' makefile. It requires | ||
Cygwin to work with since it requires the auto-export/import functionality. The resulting DLL and import library | ||
``libtommath.dll.a'' can be used to link LibTomMath dynamically to any Windows program using Cygwin. | ||
\subsection{Testing} | ||
To build the library and the test harness type | ||
... | ... | @@ -144,6 +151,96 @@ This will output a row of numbers that are increasing. Each column is a differe |
that is being performed. The numbers represent how many times the test was invoked. If an error is detected the program | ||
will exit with a dump of the relevent numbers it was working with. | ||
\section{Build Configuration} | ||
LibTomMath can configured at build time in three phases we shall call ``depends'', ``tweaks'' and ``trims''. | ||
Each phase changes how the library is built and they are applied one after another respectively. | ||
To make the system more powerful you can tweak the build process. Classes are defined in the file | ||
``tommath\_superclass.h''. By default, the symbol ``LTM\_ALL'' shall be defined which simply | ||
instructs the system to build all of the functions. This is how LibTomMath used to be packaged. This will give you | ||
access to every function LibTomMath offers. | ||
However, there are cases where such a build is not optional. For instance, you want to perform RSA operations. You | ||
don't need the vast majority of the library to perform these operations. Aside from LTM\_ALL there is | ||
another pre--defined class ``SC\_RSA\_1'' which works in conjunction with the RSA from LibTomCrypt. Additional | ||
classes can be defined base on the need of the user. | ||
\subsection{Build Depends} | ||
In the file tommath\_class.h you will see a large list of C ``defines'' followed by a series of ``ifdefs'' | ||
which further define symbols. All of the symbols (technically they're macros $\ldots$) represent a given C source | ||
file. For instance, BN\_MP\_ADD\_C represents the file ``bn\_mp\_add.c''. When a define has been enabled the | ||
function in the respective file will be compiled and linked into the library. Accordingly when the define | ||
is absent the file will not be compiled and not contribute any size to the library. | ||
You will also note that the header tommath\_class.h is actually recursively included (it includes itself twice). | ||
This is to help resolve as many dependencies as possible. In the last pass the symbol LTM\_LAST will be defined. | ||
This is useful for ``trims''. | ||
\subsection{Build Tweaks} | ||
A tweak is an algorithm ``alternative''. For example, to provide tradeoffs (usually between size and space). | ||
They can be enabled at any pass of the configuration phase. | ||
\begin{small} | ||
\begin{center} | ||
\begin{tabular}{|l|l|} | ||
\hline \textbf{Define} & \textbf{Purpose} \\ | ||
\hline BN\_MP\_DIV\_SMALL & Enables a slower, smaller and equally \\ | ||
& functional mp\_div() function \\ | ||
\hline | ||
\end{tabular} | ||
\end{center} | ||
\end{small} | ||
\subsection{Build Trims} | ||
A trim is a manner of removing functionality from a function that is not required. For instance, to perform | ||
RSA cryptography you only require exponentiation with odd moduli so even moduli support can be safely removed. | ||
Build trims are meant to be defined on the last pass of the configuration which means they are to be defined | ||
only if LTM\_LAST has been defined. | ||
\subsubsection{Moduli Related} | ||
\begin{small} | ||
\begin{center} | ||
\begin{tabular}{|l|l|} | ||
\hline \textbf{Restriction} & \textbf{Undefine} \\ | ||
\hline Exponentiation with odd moduli only & BN\_S\_MP\_EXPTMOD\_C \\ | ||
& BN\_MP\_REDUCE\_C \\ | ||
& BN\_MP\_REDUCE\_SETUP\_C \\ | ||
& BN\_S\_MP\_MUL\_HIGH\_DIGS\_C \\ | ||
& BN\_FAST\_S\_MP\_MUL\_HIGH\_DIGS\_C \\ | ||
\hline Exponentiation with random odd moduli & (The above plus the following) \\ | ||
& BN\_MP\_REDUCE\_2K\_C \\ | ||
& BN\_MP\_REDUCE\_2K\_SETUP\_C \\ | ||
& BN\_MP\_REDUCE\_IS\_2K\_C \\ | ||
& BN\_MP\_DR\_IS\_MODULUS\_C \\ | ||
& BN\_MP\_DR\_REDUCE\_C \\ | ||
& BN\_MP\_DR\_SETUP\_C \\ | ||
\hline Modular inverse odd moduli only & BN\_MP\_INVMOD\_SLOW\_C \\ | ||
\hline Modular inverse (both, smaller/slower) & BN\_FAST\_MP\_INVMOD\_C \\ | ||
\hline | ||
\end{tabular} | ||
\end{center} | ||
\end{small} | ||
\subsubsection{Operand Size Related} | ||
\begin{small} | ||
\begin{center} | ||
\begin{tabular}{|l|l|} | ||
\hline \textbf{Restriction} & \textbf{Undefine} \\ | ||
\hline Moduli $\le 2560$ bits & BN\_MP\_MONTGOMERY\_REDUCE\_C \\ | ||
& BN\_S\_MP\_MUL\_DIGS\_C \\ | ||
& BN\_S\_MP\_MUL\_HIGH\_DIGS\_C \\ | ||
& BN\_S\_MP\_SQR\_C \\ | ||
\hline Polynomial Schmolynomial & BN\_MP\_KARATSUBA\_MUL\_C \\ | ||
& BN\_MP\_KARATSUBA\_SQR\_C \\ | ||
& BN\_MP\_TOOM\_MUL\_C \\ | ||
& BN\_MP\_TOOM\_SQR\_C \\ | ||
\hline | ||
\end{tabular} | ||
\end{center} | ||
\end{small} | ||
\section{Purpose of LibTomMath} | ||
Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath was not written with | ||
bleeding edge performance in mind. First and foremost LibTomMath was written to be entirely open. Not only is the | ||
... | ... |
#include <tommath.h> | ||
#ifdef BN_ERROR_C | ||
/* LibTomMath, multiple-precision integer library -- Tom St Denis | ||
* | ||
* LibTomMath is a library that provides multiple-precision | ||
... | ... | @@ -12,7 +14,6 @@ |
* | ||
* Tom St Denis, [email protected], http://math.libtomcrypt.org | ||
*/ | ||
#include <tommath.h> | ||
static const struct { | ||
int code; | ||
... | ... | @@ -39,3 +40,4 @@ char *mp_error_to_string(int code) |
return "Invalid error code"; | ||
} | ||
#endif |
#include <tommath.h> | ||
#ifdef BN_FAST_MP_INVMOD_C | ||
/* LibTomMath, multiple-precision integer library -- Tom St Denis | ||
* | ||
* LibTomMath is a library that provides multiple-precision | ||
... | ... | @@ -12,12 +14,11 @@ |
* | ||
* Tom St Denis, [email protected], http://math.libtomcrypt.org | ||
*/ | ||
#include <tommath.h> | ||
/* computes the modular inverse via binary extended euclidean algorithm, | ||
* that is c = 1/a mod b | ||
* | ||
* Based on mp_invmod except this is optimized for the case where b is | ||
* Based on slow invmod except this is optimized for the case where b is | ||
* odd as per HAC Note 14.64 on pp. 610 | ||
*/ | ||
int | ||
... | ... | @@ -141,3 +142,4 @@ top: |
__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); | ||
return res; | ||
} | ||
#endif |
#include <tommath.h> | ||
#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C | ||
/* LibTomMath, multiple-precision integer library -- Tom St Denis | ||
* | ||
* LibTomMath is a library that provides multiple-precision | ||
... | ... | @@ -12,11 +14,10 @@ |
* | ||
* Tom St Denis, [email protected], http://math.libtomcrypt.org | ||
*/ | ||
#include <tommath.h> | ||
/* computes xR**-1 == x (mod N) via Montgomery Reduction | ||
* | ||
* This is an optimized implementation of mp_montgomery_reduce | ||
* This is an optimized implementation of montgomery_reduce | ||
* which uses the comba method to quickly calculate the columns of the | ||
* reduction. | ||
* | ||
... | ... | @@ -165,3 +166,4 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) |
} | ||
return MP_OKAY; | ||
} | ||
#endif |
#include <tommath.h> | ||
#ifdef BN_FAST_S_MP_MUL_DIGS_C | ||
/* LibTomMath, multiple-precision integer library -- Tom St Denis | ||
* | ||
* LibTomMath is a library that provides multiple-precision | ||
... | ... | @@ -12,7 +14,6 @@ |
* | ||
* Tom St Denis, [email protected], http://math.libtomcrypt.org | ||
*/ | ||
#include <tommath.h> | ||
/* Fast (comba) multiplier | ||
* | ||
... | ... | @@ -33,8 +34,9 @@ |
int | ||
fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) | ||
{ | ||
int olduse, res, pa, ix; | ||
mp_word W[MP_WARRAY]; | ||
int olduse, res, pa, ix, iz; | ||
mp_digit W[MP_WARRAY]; | ||
register mp_word _W; | ||
/* grow the destination as required */ | ||
if (c->alloc < digs) { | ||
... | ... | @@ -43,82 +45,52 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) |
} | ||
} | ||
/* clear temp buf (the columns) */ | ||
memset (W, 0, sizeof (mp_word) * digs); | ||
/* number of output digits to produce */ | ||
pa = MIN(digs, a->used + b->used); | ||
/* calculate the columns */ | ||
pa = a->used; | ||
for (ix = 0; ix < pa; ix++) { | ||
/* this multiplier has been modified to allow you to | ||
* control how many digits of output are produced. | ||
* So at most we want to make upto "digs" digits of output. | ||
* | ||
* this adds products to distinct columns (at ix+iy) of W | ||
* note that each step through the loop is not dependent on | ||
* the previous which means the compiler can easily unroll | ||
* the loop without scheduling problems | ||
*/ | ||
{ | ||
register mp_digit tmpx, *tmpy; | ||
register mp_word *_W; | ||
register int iy, pb; | ||
/* clear the carry */ | ||
_W = 0; | ||
for (ix = 0; ix <= pa; ix++) { | ||
int tx, ty; | ||
int iy; | ||
mp_digit *tmpx, *tmpy; | ||
/* alias for the the word on the left e.g. A[ix] * A[iy] */ | ||
tmpx = a->dp[ix]; | ||
/* get offsets into the two bignums */ | ||
ty = MIN(b->used-1, ix); | ||
tx = ix - ty; | ||
/* alias for the right side */ | ||
tmpy = b->dp; | ||
/* alias for the columns, each step through the loop adds a new | ||
term to each column | ||
*/ | ||
_W = W + ix; | ||
/* setup temp aliases */ | ||
tmpx = a->dp + tx; | ||
tmpy = b->dp + ty; | ||
/* the number of digits is limited by their placement. E.g. | ||
we avoid multiplying digits that will end up above the # of | ||
digits of precision requested | ||
/* this is the number of times the loop will iterrate, essentially its | ||
while (tx++ < a->used && ty-- >= 0) { ... } | ||
*/ | ||
pb = MIN (b->used, digs - ix); | ||
iy = MIN(a->used-tx, ty+1); | ||
for (iy = 0; iy < pb; iy++) { | ||
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++); | ||
/* execute loop */ | ||
for (iz = 0; iz < iy; ++iz) { | ||
_W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); | ||
} | ||
} | ||
/* store term */ | ||
W[ix] = ((mp_digit)_W) & MP_MASK; | ||
/* make next carry */ | ||
_W = _W >> ((mp_word)DIGIT_BIT); | ||
} | ||
/* setup dest */ | ||
olduse = c->used; | ||
olduse = c->used; | ||
c->used = digs; | ||
{ | ||
register mp_digit *tmpc; | ||
/* At this point W[] contains the sums of each column. To get the | ||
* correct result we must take the extra bits from each column and | ||
* carry them down | ||
* | ||
* Note that while this adds extra code to the multiplier it | ||
* saves time since the carry propagation is removed from the | ||
* above nested loop.This has the effect of reducing the work | ||
* from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the | ||
* cost of the shifting. On very small numbers this is slower | ||
* but on most cryptographic size numbers it is faster. | ||
* | ||
* In this particular implementation we feed the carries from | ||
* behind which means when the loop terminates we still have one | ||
* last digit to copy | ||
*/ | ||
tmpc = c->dp; | ||
for (ix = 1; ix < digs; ix++) { | ||
/* forward the carry from the previous temp */ | ||
W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT)); | ||
for (ix = 0; ix < digs; ix++) { | ||
/* now extract the previous digit [below the carry] */ | ||
*tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); | ||
*tmpc++ = W[ix]; | ||
} | ||
/* fetch the last digit */ | ||
*tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK)); | ||
/* clear unused digits [that existed in the old copy of c] */ | ||
for (; ix < olduse; ix++) { | ||
... | ... | @@ -128,3 +100,4 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) |
mp_clamp (c); | ||
return MP_OKAY; | ||
} | ||
#endif |
#include <tommath.h> | ||
#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C | ||
/* LibTomMath, multiple-precision integer library -- Tom St Denis | ||
* | ||
* LibTomMath is a library that provides multiple-precision | ||
... | ... | @@ -12,10 +14,9 @@ |
* | ||
* Tom St Denis, [email protected], http://math.libtomcrypt.org | ||
*/ | ||
#include <tommath.h> | ||
/* this is a modified version of fast_s_mp_mul_digs that only produces | ||
* output digits *above* digs. See the comments for fast_s_mp_mul_digs | ||
/* this is a modified version of fast_s_mul_digs that only produces | ||
* output digits *above* digs. See the comments for fast_s_mul_digs | ||
* to see how it works. | ||
* | ||
* This is used in the Barrett reduction since for one of the multiplications | ||
... | ... | @@ -26,73 +27,69 @@ |
int | ||
fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) | ||
{ | ||
int oldused, newused, res, pa, pb, ix; | ||
mp_word W[MP_WARRAY]; | ||
int olduse, res, pa, ix, iz; | ||
mp_digit W[MP_WARRAY]; | ||
mp_word _W; | ||
/* calculate size of product and allocate more space if required */ | ||
newused = a->used + b->used + 1; | ||
if (c->alloc < newused) { | ||
if ((res = mp_grow (c, newused)) != MP_OKAY) { | ||
/* grow the destination as required */ | ||
pa = a->used + b->used; | ||
if (c->alloc < pa) { | ||
if ((res = mp_grow (c, pa)) != MP_OKAY) { | ||
return res; | ||
} | ||
} | ||
/* like the other comba method we compute the columns first */ | ||
pa = a->used; | ||
pb = b->used; | ||
memset (W + digs, 0, (pa + pb + 1 - digs) * sizeof (mp_word)); | ||
for (ix = 0; ix < pa; ix++) { | ||
{ | ||
register mp_digit tmpx, *tmpy; | ||
register int iy; | ||
register mp_word *_W; | ||
/* number of output digits to produce */ | ||
pa = a->used + b->used; | ||
_W = 0; | ||
for (ix = digs; ix <= pa; ix++) { | ||
int tx, ty, iy; | ||
mp_digit *tmpx, *tmpy; | ||
/* work todo, that is we only calculate digits that are at "digs" or above */ | ||
iy = digs - ix; | ||
/* get offsets into the two bignums */ | ||
ty = MIN(b->used-1, ix); | ||
tx = ix - ty; | ||
/* copy of word on the left of A[ix] * B[iy] */ | ||
tmpx = a->dp[ix]; | ||
/* setup temp aliases */ | ||
tmpx = a->dp + tx; | ||
tmpy = b->dp + ty; | ||
/* alias for right side */ | ||
tmpy = b->dp + iy; | ||
/* alias for the columns of output. Offset to be equal to or above the | ||
* smallest digit place requested | ||
/* this is the number of times the loop will iterrate, essentially its | ||
while (tx++ < a->used && ty-- >= 0) { ... } | ||
*/ | ||
_W = W + digs; | ||
/* skip cases below zero where ix > digs */ | ||
if (iy < 0) { | ||
iy = abs(iy); | ||
tmpy += iy; | ||
_W += iy; | ||
iy = 0; | ||
} | ||
iy = MIN(a->used-tx, ty+1); | ||
/* compute column products for digits above the minimum */ | ||
for (; iy < pb; iy++) { | ||
*_W++ += ((mp_word) tmpx) * ((mp_word)*tmpy++); | ||
/* execute loop */ | ||
for (iz = 0; iz < iy; iz++) { | ||
_W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); | ||
} | ||
} | ||
/* store term */ | ||
W[ix] = ((mp_digit)_W) & MP_MASK; | ||
/* make next carry */ | ||
_W = _W >> ((mp_word)DIGIT_BIT); | ||
} | ||
/* setup dest */ | ||
oldused = c->used; | ||
c->used = newused; | ||
olduse = c->used; | ||
c->used = pa; | ||
/* now convert the array W downto what we need | ||
* | ||
* See comments in bn_fast_s_mp_mul_digs.c | ||
*/ | ||
for (ix = digs + 1; ix < newused; ix++) { | ||