# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# October 2012.
#
# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and
# onward. There are three new instructions used here: umulxhi,
# addxc[cc] and initializing store. On T3 RSA private key operations
# are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key
# lengths. This is without dedicated squaring procedure. On T4
# corresponding coefficients are 1.47/2.10/2.80/2.90x, which is mostly
# for reference purposes, because T4 has dedicated Montgomery
# multiplication and squaring *instructions* that deliver even more.
$bits=32;
#include <openssl/fipssyms.h>
(map("%g$_",(1..5)),map("%o$_",(0..5,7)));
# int bn_mul_mont(
# and >=6
.align 32
# +-------------------------------+<----- %sp
# . .
# +-------------------------------+<----- aligned at 64 bytes
# | __int64 tmp[0] |
# +-------------------------------+
# . .
# . .
# +-------------------------------+<----- aligned at 64 bytes
# | __int64 ap[1..0] | converted ap[]
# +-------------------------------+
# | __int64 np[1..0] | converted np[]
# +-------------------------------+
# | __int64 ap[3..2] |
# . .
# . .
# +-------------------------------+
.align 16
.L1st:
!.L1st
.align 16
.Louter:
.align 16
.Linner:
!.Linner
sub $i, 8, $i
.align 16
.Lsub:
.align 16
.asciz "Montgomery Multiplication for SPARCv9 VIS3, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# Idea is to reserve for option to produce "universal" binary and let
# programmer detect if current CPU is VIS capable at run-time.
sub unvis3 {
"addxccc" => 0x013,
"umulxhi" => 0x016 );
$ref = "$mnemonic\t$rs1,$rs2,$rd";
}
return sprintf ".word\t0x%08x !%s",
$ref;
} else {
return $ref;
}
}
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge;
/ge;
print $_,"\n";
}
close STDOUT;