mont_mulf_kernel_v9.s revision 8de5c4f463386063e184a851437d58080c6c626c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is mostly a result of compiling the mont_mulf.c file to generate an
* assembly output and then hand-editing that output to replace the
* compiler-generated loop for the 512-bit case (nlen == 16) in the
* mont_mulf_noconv routine with a hand-crafted version. This file also
* has big_savefp() and big_restorefp() routines added by hand.
*/
#include <sys/asm_linkage.h>
#include <sys/privregs.h>
#include <sys/machthread.h>
#include <sys/machtrap.h>
/* ARGSUSED */
double2uint64_t(double* d)
{
return (0ULL);
}
/* ARGSUSED */
void
{
}
/* ARGSUSED */
void
{
}
/* ARGSUSED */
void
{
}
/* ARGSUSED */
void
{
}
#else /* lint || __lint */
.file "mont_mulf.c"
!
!
.align 8
!
!
.word 1089470464
.word 0
!
!
.word 1055916032
.word 0
!
!
Zero:
.word 0
.word 0
!
!
.word 1106247680
.word 0
!
!
.word 1039138816
.word 0
! 1 !/*
! 2 ! * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
! 3 ! * Use is subject to license terms.
! 4 ! */
! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI"
! 9 !/*
! 10 ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time
! 11 ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time
! 12 ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
! 13 ! */
! 16 !#include <math.h>
! 24 !#ifdef RF_INLINE_MACROS
! 26 !double upper32(double);
! 27 !double lower32(double, double);
! 28 !double mod(double, double, double);
! 30 !#else
! 32 !static double
! 33 !upper32(double x)
! 34 !{
! 36 !}
! 39 !/* ARGSUSED */
! 40 !static double
! 41 !lower32(double x, double y)
! 42 !{
! 44 !}
! 46 !static double
! 48 !{
! 50 !}
! 52 !#endif
! 55 !static void
! 57 !{
!
!
! 58 ! int i;
! 64 ! x = dt[i];
/* 0x0090 */ nop
! 70 ! }
! 71 !}
! 75 !#ifdef _KERNEL
! 76 !/*
! 77 ! * This only works if 0 <= d < 2^53
! 78 ! */
! 79 !uint64_t
! 80 !double2uint64_t(double* d)
! 81 !{
! 82 ! uint64_t x;
! 86 ! x = *((uint64_t *)d);
!
!
! 87 ! if (x == 0) {
! 88 ! return (0ULL);
! 89 ! }
! 94 ! return (x);
! 95 !}
! 96 !#else
! 97 !/*
! 98 ! * This only works if 0 <= d < 2^63
! 99 ! */
! 100 !uint64_t
! 101 !double2uint64_t(double* d)
! 102 !{
! 103 ! return ((int64_t)(*d));
! 104 !}
! 105 !#endif
! 107 !/* ARGSUSED */
! 108 !void
! 110 !{
!
!
! 111 ! int i;
! 113 ! a, b, c, d; /* because more efficient code is */
! 114 ! /* generated this way, and there */
! 115 ! /* is no overflow */
! 116 ! t1 = 0;
! 122 ! t = (a >> 32);
! 127 ! t1 = t;
! 128 ! a = c;
! 129 ! b = d;
! 130 ! }
! 132 ! t = (a >> 32);
!
!
! 135 !}
! 138 !void
! 140 !{
!
!
! 141 ! int i;
! 143 !#pragma pipeloop(0)
! 144 ! for (i = 0; i < len; i++)
/* 0x0168 */ nop
!
!
! 146 !}
! 149 !void
! 151 !{
!
!
! 152 ! int i;
! 153 ! uint32_t a;
! 155 !#pragma pipeloop(0)
! 156 ! for (i = 0; i < len; i++) {
! 157 ! a = i32[i];
!
!
! 160 ! }
! 161 !}
! 163 !#ifdef RF_INLINE_MACROS
! 165 !void
! 167 ! const double *, /* 2^16 */
! 168 ! const double *, /* 0 */
! 169 ! double *, /* result16 */
! 170 ! double *, /* result32 */
! 171 ! float *); /* source - should be unsigned int* */
! 172 ! /* converted to float* */
! 174 !#else
! 177 !/* ARGSUSED */
! 178 !static void
! 182 ! double *result16,
! 183 ! double *result32,
! 185 ! /* converted to float* */
! 186 !{
! 188 ! uint32_t a, b, c, d;
! 191 ! a = i32[0];
! 197 ! result32[0] = (double)a;
! 207 !}
! 209 !#endif
! 212 !void
! 214 !{
!
!
! 215 ! int i;
! 216 ! uint32_t a;
! 218 !#pragma pipeloop(0)
! 222 ! (float *)(&(i32[i])));
! 223 ! }
! 224 ! for (; i < len; i++) {
! 225 ! a = i32[i];
! 229 ! }
! 230 !}
! 234 !static void
! 236 !{
!
!
! 238 ! int i;
! 241 ! i = -1;
! 242 ! } else {
/* 0x0088 */ nop
! 245 ! }
! 246 ! }
! 248 ! acc = 0;
! 249 ! for (i = 0; i < len; i++) {
/* 0x0224 */ nop
! 253 ! }
! 254 ! }
! 255 !}
! 257 !/*************
! 258 !static void
! 259 !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len)
! 260 !{
! 261 ! int64_t acc;
! 262 ! int i;
! 264 ! c4++;
! 265 !
! 266 ! if (i32[len] > 0) {
! 267 ! i = -1;
! 268 ! c1++;
! 269 ! } else {
! 270 ! for (i = len - 1; i >= 0; i++) {
! 271 ! if (i32[i] != nint[i]) break;
! 272 ! c2++;
! 273 ! }
! 274 ! }
! 275 ! if ((i < 0) || (i32[i] > nint[i])) {
! 276 ! c3++;
! 277 ! acc = 0;
! 278 ! for (i = 0; i < len; i++) {
! 279 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
! 280 ! i32[i] = acc & 0xffffffff;
! 281 ! acc = acc >> 32;
! 282 ! }
! 283 ! }
! 284 !}
! 285 !uint32_t saveresult[1000];
! 286 !void printarray(char *name, uint32_t *arr, int len)
! 287 !{
! 288 ! int i, j;
! 289 ! uint64_t tmp;
! 291 ! printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2);
! 292 ! for(i=j=0; i<len; i+=2,j+=2){
! 293 ! if(j == 6){
! 294 ! printf("\n");
! 295 ! j=0;
! 296 ! }
! 297 ! tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]);
! 298 ! printf("0x%016llx",tmp);
! 299 ! if((i/2)!=(((len+1)/2)-1))printf(",");
! 300 ! if(j!=4)printf(" ");
! 301 ! }
! 302 ! if(j!=0) printf("\n");
! 303 ! printf("};\n");
! 304 !}
! 305 !**************/
! 308 !/*
! 309 ! * the lengths of the input arrays should be at least the following:
! 310 ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
! 311 ! * all of them should be different from one another
! 312 ! */
! 317 !{
!
!
! 318 ! int i, j, jj;
! 340 !#pragma pipeloop(0)
! 343 ! }
! 346 ! jj = 0;
! 347 ! }
! 351 ! }
! 352 ! } else {
!1
!2
!3
!4
!5
!6
!7
!8
!9
!10
!11
!12
!13
!14
!15
!16
!17
!18
!19
!20
!21
!22
!23
!24
!25
!26
!27
!28
!29
!30
!31
!32
!33
!34
!35
!36
!37
!38
!39
!40
!41
!42
!43
!44
!45
!46
!47
!48
!49
!50
!51
!52
!53
!54
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!1
!2
!3
!4
!5
!6
!7
!8
!9
!10
!11
!12
!13
!14
!15
!16
!17
!18
!19
!20
!21
!22
!23
!24
!25
!26
!27
!28
!29
!30
!31
!32
!33
!34
!35
!36
!37
!38
!39
!40
!41
!42
!43
!44
!45
!46
!47
!48
!49
!50
!51
!52
!53
!54
!55
! 406 ! }
! 407 ! }
! 411 !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/
/* 0x046c */ nop
/* 0x0aa0 */ nop
.xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0
.xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o mont_mulf.c",52,0,0,0
#define FZERO \
#include "assym.h"
/*
* we don't want to take a fp_disabled trap. We need not
*/
#endif /* lint || __lint */