bignumimpl.c revision f56c1286e5113aa46bd6e723da14d30c123153f2
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#define big_div_pos_fast big_div_pos
#include "bignum.h"
/*
* Configuration guide
* -------------------
*
* There are 4 preprocessor symbols used to configure the bignum
* implementation. This file contains no logic to configure based on
* processor; we leave that to the Makefiles to specify.
*
* USE_FLOATING_POINT
* Meaning: There is support for a fast floating-point implementation of
* Montgomery multiply.
*
* PSR_MUL
* Meaning: There are processor-specific versions of the low level
* functions to implement big_mul. Those functions are: big_mul_set_vec,
* big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
* for all 4 functions. You cannot pick and choose which subset of these
* functions to support; that would lead to a rat's nest of #ifdefs.
*
* HWCAP
* Meaning: Call multiply support functions through a function pointer.
* On x86, there are multiple implementations for differnt hardware
* capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
* a function is first used. So, the support functions are called through
* a function pointer. There is no need for that on Sparc, because there
* is only one implementation; support functions are called directly.
* Later, if there were some new VIS instruction, or something, and a
* run-time test were needed, rather than variant kernel modules and
* libraries, then HWCAP would be defined for Sparc, as well.
*
* UMUL64
* Meaning: It is safe to use generic C code that assumes the existence
* of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
* then the generic code for big_mul_add_vec() must necessarily be very slow,
* because it must fall back to using 16 x 16 --> 32 bit multiplication.
*
*/
#ifdef _KERNEL
void *
{
void *rv;
return (rv);
}
#else /* _KERNEL */
#include <stdlib.h>
#include <stdio.h>
#ifndef MALLOC_DEBUG
#else
void
{
}
void *
{
void *rv;
return (rv);
}
#endif /* MALLOC_DEBUG */
#define big_realloc(x, y, z) realloc((x), (z))
void
{
int i;
for (i = a->len - 1; i >= 0; i--) {
if ((i % 8 == 0) && (i != 0))
(void) printf("\n");
}
(void) printf("\n");
}
#endif /* _KERNEL */
/* size in 32-bit words */
{
return (BIG_NO_MEM);
}
return (BIG_OK);
}
/* size in 32-bit words */
{
return (BIG_NO_MEM);
}
} else {
}
return (BIG_OK);
}
void
{
}
}
/*
* bn->size should be at least (len + 3) / 4
* converts from byte-big-endian format to bignum format (words in
* little endian order, but bytes within the words big endian)
*/
void
{
int i, j, offs;
#ifdef _LP64
/* LINTED */
/* LINTED */
/* LINTED */
#else /* !_LP64 */
#endif /* _LP64 */
for (j = 1; j < sizeof (uint32_t); j++) {
}
}
if (offs > 0) {
}
}
}
/*
* copies the least significant len bytes if
* len < bn->len * sizeof (uint32_t)
* converts from bignum format to byte-big-endian format.
* bignum format is words in little endian order,
* but bytes within words in native byte order.
*/
void
{
int i, j, offs;
#ifdef _LP64
/* LINTED */
#else /* !_LP64 */
#endif /* _LP64 */
for (j = 0; j < sizeof (uint32_t); j++) {
word & 0xff;
}
}
#ifdef _LP64
/* LINTED */
#else /* !_LP64 */
#endif /* _LP64 */
if (offs > 0) {
#ifdef _LP64
/* LINTED */
i > 0; i --) {
#else /* !_LP64 */
#endif /* _LP64 */
}
}
} else {
for (j = 0; j < sizeof (uint32_t); j++) {
word & 0xff;
}
}
#ifdef _LP64
/* LINTED */
i++) {
#else /* !_LP64 */
#endif /* _LP64 */
kn[i] = 0;
}
}
}
int
big_bitlength(BIGNUM *a)
{
int l, b;
uint32_t c;
l = a->len - 1;
while ((l > 0) && (a->value[l] == 0)) {
l--;
}
b = sizeof (uint32_t) * 8;
c = a->value[l];
while ((b > 1) && ((c & 0x80000000) == 0)) {
c = c << 1;
b--;
}
return (l * (int)sizeof (uint32_t) * 8 + b);
}
{
int i, len;
len--;
} else {
}
return (BIG_NO_MEM);
}
return (BIG_OK);
}
{
int i;
return (BIG_OK);
} else {
}
}
}
return (BIG_NO_MEM);
return (BIG_OK);
}
int
big_is_zero(BIGNUM *n)
{
int i, result;
result = 1;
for (i = 0; i < n->len; i++)
return (result);
}
{
uint32_t *r, *a, *b, *c;
} else {
}
return (err);
}
cy = 0;
for (i = 0; i < shorter; i++) {
ai = a[i];
}
for (; i < longer; i++) {
ai = c[i];
}
if (cy == 1) {
r[i] = cy;
} else {
}
return (BIG_OK);
}
/* caller must make sure that result has at least len words allocated */
void
{
int i;
cy = 1;
for (i = 0; i < len; i++) {
ai = a[i];
}
}
/* result=aa-bb it is assumed that aa>=bb */
{
int i, shorter;
uint32_t *r, *a, *b;
return (err);
}
cy = 1;
for (i = 0; i < shorter; i++) {
ai = a[i];
}
ai = a[i];
}
if (cy == 0)
return (BIG_INVALID_ARGS);
else
return (BIG_OK);
}
/* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
int
{
int i;
return (1);
}
return (-1);
}
for (; i >= 0; i--) {
return (1);
return (-1);
}
return (0);
}
{
return (err);
return (err);
return (err);
} else {
return (err);
}
} else {
return (err);
} else {
return (err);
}
}
return (BIG_OK);
}
{
return (err);
return (err);
return (err);
} else {
return (err);
}
} else {
return (err);
} else {
return (err);
}
}
return (BIG_OK);
}
/* result = aa/2 aa must be positive */
{
int i;
uint32_t *a, *r;
return (err);
}
cy = 0;
cy1 = a[i] << 31;
r[i] = (cy|(a[i] >> 1));
}
return (BIG_OK);
}
/* result = aa*2 aa must be positive */
{
int i, rsize;
uint32_t *a, *r;
return (err);
}
cy = 0;
cy1 = a[i] >> 31;
r[i] = (cy | (a[i] << 1));
}
return (BIG_OK);
}
/* returns aa mod b, aa must be nonneg, b must be a max 16-bit integer */
{
int i;
return (0);
}
return (rem);
}
/*
* result = aa - (2^32)^lendiff * bb
* result->size should be at least aa->len at entry
* aa, bb, and result should be positive
*/
void
{
int i, lendiff;
for (i = 0; i < lendiff; i++) {
}
}
}
/*
* returns 1, 0, or -1 depending on whether |aa| > , ==, or <
* (2^32)^lendiff * |bb|
* aa->len should be >= bb->len
*/
int
{
int lendiff;
}
/*
* result = aa * b where b is a max. 16-bit positive integer.
* result should have enough space allocated.
*/
void
{
int i;
uint32_t *a, *r;
cy = 0;
ai = a[i];
}
r[i] = cy;
}
/*
* result = aa * b * 2^16 where b is a max. 16-bit positive integer.
* result should have enough space allocated.
*/
void
{
int i;
uint32_t *a, *r;
cy = 0;
ri = 0;
ai = a[i];
}
}
/* it is assumed that result->size is big enough */
void
{
int i;
if (offs == 0) {
}
return;
}
cy = 0;
}
if (cy != 0) {
} else {
}
}
/* it is assumed that result->size is big enough */
void
{
int i;
if (offs == 0) {
}
return;
}
}
}
/*
* it is assumed that aa and bb are positive
*/
{
uint32_t *a, *b;
if ((blen == 1) && (b[0] == 0))
return (BIG_DIV_BY_0);
return (err);
}
return (BIG_OK);
}
return (err);
goto ret1;
goto ret2;
goto ret3;
goto ret4;
offs = 0;
if (blen > 1) {
} else {
}
if (highb64 >= 0x1000000000000ull) {
offs = 16;
}
while ((highb64 & 0x800000000000ull) == 0) {
offs++;
}
#ifdef _LP64
/* LINTED */
#else /* !_LP64 */
#endif /* _LP64 */
if (offs <= 15) {
} else {
}
} else {
}
for (i = 0; i < rlen; i++) {
coeff++;
}
tlen--;
coeff++;
}
coeff++;
}
}
goto ret;
ret:
ret4:
big_finish(&tmp1);
ret3:
big_finish(&tmp2);
ret2:
big_finish(&bbhigh);
ret1:
big_finish(&bblow);
return (err);
}
/*
* If there is no processor-specific integer implementation of
* the lower level multiply functions, then this code is provided
* for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
* big_sqr_vec().
*
* There are two generic implementations. One that assumes that
* there is hardware and C compiler support for a 32 x 32 --> 64
* bit unsigned multiply, but otherwise is not specific to any
* processor, platform, or ISA.
*
* The other makes very few assumptions about hardware capabilities.
* It does not even assume that there is any implementation of a
* 32 x 32 --> 64 bit multiply that is accessible to C code and
* appropriate to use. It falls constructs 32 x 32 --> 64 bit
* multiplies from 16 x 16 --> 32 bit multiplies.
*
*/
#if !defined(PSR_MUL)
#ifdef UMUL64
#define UNROLL8
#define MUL_SET_VEC_ROUND_PREFETCH(R) \
p = pf * d; \
t = p + cy; \
r[R] = (uint32_t)t; \
cy = t >> 32
#define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
p = pf * d; \
t = p + cy; \
r[R] = (uint32_t)t; \
cy = t >> 32
#define MUL_ADD_VEC_ROUND_PREFETCH(R) \
t = (uint64_t)r[R]; \
p = pf * d; \
t = p + t + cy; \
r[R] = (uint32_t)t; \
cy = t >> 32
#define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
t = (uint64_t)r[R]; \
p = pf * d; \
t = p + t + cy; \
r[R] = (uint32_t)t; \
cy = t >> 32
#ifdef UNROLL8
#define UNROLL 8
/*
* r = a * b
* where r and a are vectors; b is a single 32-bit digit
*/
{
if (len == 0)
return (0);
cy = 0;
d = (uint64_t)b;
r += UNROLL;
a += UNROLL;
}
}
while (len > 1) {
++r;
++a;
--len;
}
if (len > 0) {
}
}
/*
* r += a * b
* where r and a are vectors; b is a single 32-bit digit
*/
{
if (len == 0)
return (0);
cy = 0;
d = (uint64_t)b;
while (len > 8) {
r += 8;
a += 8;
len -= 8;
}
if (len == 8) {
}
while (len > 1) {
++r;
++a;
--len;
}
if (len > 0) {
}
}
#endif /* UNROLL8 */
void
{
uint32_t d;
tr = r + 1;
ta = a;
while (--tlen > 0) {
tr += 2;
++ta;
}
s = (uint64_t)a[0];
s = s * s;
r[0] = (uint32_t)s;
cy = s >> 32;
r[1] = (uint32_t)p;
cy = p >> 32;
row = 1;
col = 2;
s = s * s;
t = p + s;
d = (uint32_t)t;
break;
cy = p >> 32;
++row;
col += 2;
}
}
#else /* ! UMUL64 */
/*
* r = r + a * digit, r and a are vectors of length len
* returns the carry digit
*/
{
int i;
cy1 = 0;
for (i = 0; i < len; i++) {
}
cy1 = r[0] & 0xffff;
for (i = 0; i < len - 1; i++) {
}
return (retcy);
}
/*
* r = a * digit, r and a are vectors of length len
* returns the carry digit
*/
{
}
void
{
int i;
for (i = 1; i < len; ++i)
}
#endif /* UMUL64 */
void
{
int i;
for (i = 1; i < blen; ++i)
}
#endif /* ! PSR_MUL */
/*
* result = aa * bb result->value should be big enough to hold the result
*
* Implementation: Standard grammar school algorithm
*
*/
{
uint32_t *r, *t, *a, *b;
diff = 0;
} else {
if (diff < 0) {
}
}
return (err);
/* aa or bb might be an alias to result */
}
r[0] = 0;
return (BIG_OK);
}
for (i = 0; i < blen; i++) r[i] = b[i];
return (BIG_OK);
}
for (i = 0; i < alen; i++) r[i] = a[i];
return (BIG_OK);
}
return (err);
for (i = 0; i < rsize; i++) t[i] = 0;
BIG_SQR_VEC(t, a, alen);
else if (blen > 0)
if (t[rsize - 1] == 0)
--rsize;
return (err);
return (BIG_OK);
}
/*
* caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1
* and that ret is not n
*/
{
int i, j, nlen, needsubtract;
return (err);
for (i = 0; i < nlen; i++) {
j = i + nlen;
rr[j] += c;
while (rr[j] < c) {
j++;
c = 1;
}
}
needsubtract = 0;
needsubtract = 1;
else {
needsubtract = 1;
break;
}
}
if (needsubtract)
else {
for (i = 0; i < nlen; i++)
}
return (BIG_OK);
}
{
int i;
result = 0;
tmp = 0xffffffff;
for (i = 0; i < 32; i++) {
}
return (result);
}
int
big_numbits(BIGNUM *n)
{
int i, j;
uint32_t t;
for (i = n->len - 1; i > 0; i--)
if (n->value[i] != 0) break;
t = n->value[i];
for (j = 32; j > 0; j--) {
if ((t & 0x80000000) == 0)
t = t << 1;
else
return (32 * i + j);
}
return (0);
}
/* caller must make sure that a < n */
{
int len, i;
return (err);
goto ret;
ret:
return (err);
}
/* caller must make sure that a < n */
{
int len, i;
!= BIG_OK)
return (err);
goto ret;
}
goto ret;
ret:
return (err);
}
#define MAX_EXP_BIT_GROUP_SIZE 6
#ifdef USE_FLOATING_POINT
/*
* This version makes use of floating point for performance.
*/
static BIG_ERR_CODE
{
double dn0;
double *apowers[APOWERS_MAX_SIZE];
nbits = big_numbits(e);
if (nbits < 50) {
groupbits = 1;
apowerssize = 1;
} else {
}
return (err);
goto ret1;
goto ret2;
goto ret2;
}
if (big_cmp_abs(a, n) > 0) {
goto ret2;
} else {
}
goto ret3;
goto ret3;
for (i = 0; i < apowerssize; i++) {
}
err = BIG_NO_MEM;
goto ret;
}
err = BIG_NO_MEM;
goto ret;
}
err = BIG_NO_MEM;
goto ret;
}
err = BIG_NO_MEM;
goto ret;
}
err = BIG_NO_MEM;
goto ret;
}
err = BIG_NO_MEM;
goto ret;
}
for (i = 0; i < apowerssize; i++) {
sizeof (double))) == NULL) {
err = BIG_NO_MEM;
goto ret;
}
}
for (i = 1; i < apowerssize; i++) {
}
k = 0;
l = 0;
p = 0;
bitcount = 0;
for (i = nbits / 32; i >= 0; i--) {
for (j = bitind - 1; j >= 0; j--) {
} else {
bitcount++;
p = p * 2 + bit;
if (bit == 1) {
k = k + l + 1;
l = 0;
} else {
l++;
}
for (m = 0; m < k; m++) {
}
apowers[p >> (l+1)],
for (m = 0; m < l; m++) {
}
k = 0;
l = 0;
p = 0;
bitcount = 0;
}
}
}
bitind = 32;
}
for (m = 0; m < k; m++) {
}
if (p != 0) {
}
for (m = 0; m < l; m++) {
}
goto ret;
ret:
for (i = apowerssize - 1; i >= 0; i--) {
}
ret3:
big_finish(&rr);
ret2:
big_finish(&tmp);
ret1:
big_finish(&ma);
return (err);
}
#ifdef _KERNEL
#include <sys/sysmacros.h>
/* the alignment for block stores to save fp registers */
#define FPR_ALIGN (64)
extern void big_savefp(kfpu_t *);
extern void big_restorefp(kfpu_t *);
#endif /* _KERNEL */
{
#ifdef _KERNEL
#ifdef DEBUG
if (!fpu_exists)
return (BIG_GENERAL_ERR);
#endif
return (rv);
#else
#endif /* _KERNEL */
}
#else /* ! USE_FLOATING_POINT */
/*
* This version uses strictly integer math and is safe in the kernel
* for all platforms.
*/
/*
* computes a^e mod n
* assumes a < n, n odd, result->value at least as long as n->value
*/
{
int i, j, k, l, m, p,
int nbits;
nbits = big_numbits(e);
if (nbits < 50) {
groupbits = 1;
apowerssize = 1;
} else {
}
return (err);
goto ret1;
goto ret2;
goto ret3;
}
for (i = 0; i < apowerssize; i++) {
goto ret;
}
if (big_cmp_abs(a, n) > 0) {
goto ret;
} else {
}
goto ret;
for (i = 1; i < apowerssize; i++) {
goto ret;
}
goto ret;
k = 0;
l = 0;
p = 0;
bitcount = 0;
for (i = nbits / 32; i >= 0; i--) {
for (j = bitind - 1; j >= 0; j--) {
goto ret;
} else {
bitcount++;
p = p * 2 + bit;
if (bit == 1) {
k = k + l + 1;
l = 0;
} else {
l++;
}
for (m = 0; m < k; m++) {
goto ret;
}
&(apowers[p >> (l + 1)]),
goto ret;
for (m = 0; m < l; m++) {
goto ret;
}
k = 0;
l = 0;
p = 0;
bitcount = 0;
}
}
}
bitind = 32;
}
for (m = 0; m < k; m++) {
goto ret;
}
if (p != 0) {
goto ret;
}
for (m = 0; m < l; m++) {
goto ret;
}
goto ret;
ret:
for (i = apowerssize - 1; i >= 0; i--) {
big_finish(&(apowers[i]));
}
ret3:
ret2:
ret1:
return (err);
}
#endif /* USE_FLOATING_POINT */
{
return (err);
goto ret1;
goto ret2;
/*
* check whether a is too short - to avoid timing attacks
*/
alen--;
}
/*
* a is too short, add p*q to it before
* taking it modulo p and q
* this will also affect timing, but this difference
* does not depend on p or q, only on a
* (in "normal" operation, this path will never be
* taken, so it is not a performance penalty
*/
goto ret;
goto ret;
goto ret;
goto ret;
} else {
goto ret;
goto ret;
}
goto ret;
goto ret;
goto ret;
goto ret;
goto ret;
}
goto ret;
ret:
big_finish(&tmp);
ret2:
big_finish(&aq);
ret1:
big_finish(&ap);
return (err);
}
{
nbits = big_numbits(n);
return (err);
goto ret1;
goto ret2;
goto ret3;
}
if (highbits == 32) {
} else {
}
goto ret;
if (diff <= 0) {
goto ret;
}
goto ret;
if (diff > 0) {
t = high;
mid = t;
} else if (diff < 0) {
t = low;
mid = t;
} else {
goto ret;
}
}
ret:
ret3:
ret2:
ret1:
return (err);
}
{
if (big_is_zero(nn) ||
*jac = 0;
return (BIG_OK);
}
return (err);
goto ret1;
goto ret2;
n = &t1;
m = &t2;
*jac = 1;
while (big_cmp_abs(&One, m) != 0) {
if (big_is_zero(n)) {
*jac = 0;
goto ret;
}
if ((m->value[0] & 1) == 0) {
(void) big_half_pos(m, m);
} else if ((n->value[0] & 1) == 0) {
(void) big_half_pos(n, n);
} else {
}
goto ret;
t = tmp2;
tmp2 = m;
m = n;
n = t;
}
}
ret:
ret2:
ret1:
return (err);
}
{
int m, w, i;
if (big_cmp_abs(k, &One) == 0) {
return (BIG_OK);
}
return (err);
goto ret1;
goto ret2;
m = big_numbits(k);
w = (m - 1) / 32;
if (big_cmp_abs(k, &ki) != 0)
for (i = 0; i < m; i++) {
goto ret;
goto ret;
goto ret;
goto ret;
} else {
goto ret;
goto ret;
}
if (bit == 0) {
bit = 0x80000000;
w--;
}
}
ret:
ret2:
ret1:
return (err);
}
{
int e, i, jac;
if (big_cmp_abs(n, &One) == 0)
return (BIG_FALSE);
if (big_cmp_abs(n, &Two) == 0)
return (BIG_TRUE);
if ((n->value[0] & 1) == 0)
return (BIG_FALSE);
return (err);
goto ret1;
goto ret2;
goto ret3;
goto ret4;
e = 0;
while ((o.value[0] & 1) == 0) {
e++;
(void) big_half_pos(&o, &o); /* cannot fail */
}
goto ret;
i = 0;
while ((i < e) &&
goto ret;
i++;
}
goto ret;
}
goto ret;
goto ret;
if (big_cmp_abs(&tmp, n) == 0) {
goto ret;
}
do {
(void) big_add_abs(&o, &o, &One);
goto ret;
goto ret;
} while (jac != -1);
goto ret;
ret:
ret4:
ret3:
ret2:
ret1:
if (o.malloced) big_finish(&o);
return (err);
}
#define SIEVESIZE 1000
uint32_t smallprimes[] =
{
3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97
};
{
int i;
return (err);
/* CONSTCOND */
while (1) {
for (i = 0;
i < sizeof (smallprimes) / sizeof (uint32_t); i++) {
p = smallprimes[i];
}
}
for (i = 0; i < SIEVESIZE; i++) {
if (sieve[i] == 0) {
return (err);
else
return (BIG_OK);
}
}
return (err);
}
}
/* NOTREACHED */
}
{
return (err);
return (err);
return (err);
}
return (BIG_OK);
}
/*
* given m and e, computes the rest in the equation
* gcd(m, e) = cm * m + ce * e
*/
{
int len;
return (err);
goto ret1;
goto ret2;
goto ret3;
goto ret4;
goto ret5;
goto ret6;
goto ret7;
goto ret8;
while (!big_is_zero(ri)) {
t = riminus2;
ri = t;
goto ret;
goto ret;
t = vmiminus1;
vmi = t;
goto ret;
goto ret;
t = veiminus1;
vei = t;
goto ret;
}
goto ret;
goto ret;
ret:
ret8:
ret7:
ret6:
ret5:
ret4:
ret3:
ret2:
ret1:
return (err);
}