lbnppc.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* Copyright (c) 1999 by Sun Microsystems, Inc.
* All rights reserved.
*/
/*
* Cylink Corporation � 1998
*
* This software is licensed by Cylink to the Internet Software Consortium to
* promote implementation of royalty free public key cryptography within IETF
* standards. Cylink wishes to expressly thank the contributions of Dr.
* Martin Hellman, Whitfield Diffie, Ralph Merkle and Stanford University for
* their contributions to Internet Security. In accordance with the terms of
* this license, ISC is authorized to distribute and sublicense this software
* for the practice of IETF standards.
*
* The software includes BigNum, written by Colin Plumb and licensed by Philip
* R. Zimmermann for royalty free use and distribution with Cylink's
* software. Use of BigNum as a stand alone product or component is
* specifically prohibited.
*
* Disclaimer of All Warranties. THIS SOFTWARE IS BEING PROVIDED "AS IS",
* WITHOUT ANY EXPRESSED OR IMPLIED WARRANTY OF ANY KIND WHATSOEVER. IN
* PARTICULAR, WITHOUT LIMITATION ON THE GENERALITY OF THE FOREGOING, CYLINK
* MAKES NO REPRESENTATION OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
* PURPOSE.
*
* Cylink or its representatives shall not be liable for tort, indirect,
* special or consequential damages such as loss of profits or loss of
* goodwill from the use or inability to use the software for any purpose or
* for any reason whatsoever.
*
* EXPORT LAW: Export of the Foundations Suite may be subject to compliance
* with the rules and regulations promulgated from time to time by the Bureau
* of Export Administration, United States Department of Commerce, which
* restrict the export and re-export of certain products and technical data.
* If the export of the Foundations Suite is controlled under such rules and
* regulations, then the Foundations Suite shall not be exported or
* re-exported, directly or indirectly, (a) without all export or re-export
* licenses and governmental approvals required by any applicable laws, or (b)
* in violation of any applicable prohibition against the export or re-export
* of any part of the Foundations Suite. All export licenses for software
* containing the Foundations Suite are the sole responsibility of the licensee.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include "lbnppc.h"
/*
* lbnppc.c - Assembly primitives for the bignum library, PowerPC version.
*
* Copyright (c) 1995 Colin Plumb. All rights reserved.
* For licensing and other legal details, see the file legal.c
*
* Register usage during function calls is:
* r0 - volatile
* r1 - stack pointer, preserved
* r2 - TOC pointer, preserved
* r3 - First argument and return value register
* r4-r10 - More argument registers, volatile
* r11-r12 - Volatile
* r13-r31 - Preserved
* LR, CTR, XER and MQ are all volatile.
* LR holds return address on entry.
*
* On the PPC 601, unrolling the loops more doesn't seem to speed things
* up at all. I'd be curious if other chips differed.
*/
#if __MWERKS__ < 0x800
#include "ppcasm.h" /* PowerPC assembler */
/*
* MulN1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
static const unsigned mulN1[] = {
PPC_ADDIC(0,0,0), /* Clear carry bit for loop */
/* Loop: */
/* Label: */
PPC_BLR()
};
/*
* MulAdd1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
static unsigned const mulAdd1[] = {
/* Loop: */
/* Label: */
PPC_BLR()
};
/*
* MulSub1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*
* Multiply and subtract is rather a pain. If the subtract of the
* low word of the product from out[i] generates a borrow, we want to
* increment the carry word (initially in the range 0..0xfffffffe).
* However, the PPC's carry bit CF is *clear* after a subtract, so
* we want to add (1-CF) to the carry word. This is done using two
* instructions:
*
* SUBFME, subtract from minus one extended. This computes
* rD = ~rS + 0xffffffff + CF. Since rS is from 0 to 0xfffffffe,
* ~rS is from 1 through 0xffffffff, and the sum with 0xffffffff+CF is
* from 0 through 0xfffffffff, setting the carry flag unconditionally, and
* NOR, which is used as a bitwise invert NOT instruction.
*
* The SUBFME performs the computation rD = ~rS + 0xffffffff + CF,
* = (-rS - 1) + (CF - 1) = -(rS - CF + 1) - 1 = ~(rS + 1-CF),
* which is the bitwise complement of the value we want.
* We want to add the complement of that result to the low word of the
* product, which is just what a subtract would do, if only we could get
* the carry flag clear. But it's always set, except for SUBFE, and the
* operation we just performed unconditionally *sets* the carry flag. Ugh.
* So find the complement in a separate instruction.
*/
static unsigned const mulSub1[] = {
/* Loop: */
/* Label: */
PPC_BLR()
};
#if 0
/*
* Args: BNWORD32 *n, BNWORD32 const *mod, unsigned mlen, BNWORD32 inv)
* r3 r4 r5 r6
* r7, r8 and r9 are the triple-width accumulator.
* r0 and r10 are temporary registers.
* r11 and r12 are temporary pointers into n and mod, respectively.
* r2 (!) is another temporary register.
*/
static unsigned const montReduce[] = {
/* Loop: */
PPC_MULHWU(0,0,10),
PPC_LWZU(0,x,4),
PPC_ADDC(0,7,0),
PPC_STW(0,x,0),
PPC_LI(9,0),
};
#endif
/*
* Three overlapped transition vectors for three functions.
* A PowerPC transition vector for a (potentially) inter-module
* jump or call consists of two words, an instruction address
* and a Table Of Contents (TOC) pointer, which is loaded into
* r1. Since none of the routines here have global variables,
* they don't need a TOC pointer, so the value is unimportant.
* This array places an unintersting 32-bit value after each address.
*/
unsigned const * const lbnPPC_tv[] = {
0
};
#else /* __MWERKS >= 0x800 */
/*
* MulN1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
asm void
register unsigned len, register unsigned k)
{
loop:
}
/*
* MulAdd1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
asm unsigned
register unsigned len, register unsigned k)
{
loop:
}
/*
* MulSub1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*
* Multiply and subtract is rather a pain. If the subtract of the
* low word of the product from out[i] generates a borrow, we want to
* increment the carry word (initially in the range 0..0xfffffffe).
* However, the PPC's carry bit CF is *clear* after a subtract, so
* we want to add (1-CF) to the carry word. This is done using two
* instructions:
*
* SUBFME, subtract from minus one extended. This computes
* rD = ~rS + 0xffffffff + CF. Since rS is from 0 to 0xfffffffe,
* ~rS is from 1 through 0xffffffff, and the sum with 0xffffffff+CF is
* from 0 through 0xfffffffff, setting the carry flag unconditionally, and
* NOR, which is used as a bitwise invert NOT instruction.
*
* The SUBFME performs the computation rD = ~rS + 0xffffffff + CF,
* = (-rS - 1) + (CF - 1) = -(rS - CF + 1) - 1 = ~(rS + 1-CF),
* which is the bitwise complement of the value we want.
* We want to add the complement of that result to the low word of the
* product, which is just what a subtract would do, if only we could get
* the carry flag clear. But it's always set, except for SUBFE, and the
* operation we just performed unconditionally *sets* the carry flag. Ugh.
* So find the complement in a separate instruction.
*/
asm unsigned
register unsigned len, register unsigned k)
{
loop:
}
#endif /* __MWERKS >= 0x800 */
/* 45678901234567890123456789012345678901234567890123456789012345678901234567 */