mont_mulf_kernel_v9.s revision 8de5c4f463386063e184a851437d58080c6c626c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is mostly a result of compiling the mont_mulf.c file to generate an
* assembly output and then hand-editing that output to replace the
* compiler-generated loop for the 512-bit case (nlen == 16) in the
* mont_mulf_noconv routine with a hand-crafted version. This file also
* has big_savefp() and big_restorefp() routines added by hand.
*/
#include <sys/asm_linkage.h>
#include <sys/trap.h>
#include <sys/stack.h>
#include <sys/privregs.h>
#include <sys/regset.h>
#include <sys/vis.h>
#include <sys/machthread.h>
#include <sys/machtrap.h>
#include <sys/machsig.h>
#if defined(lint) || defined(__lint)
#include <sys/types.h>
/* ARGSUSED */
uint64_t
double2uint64_t(double* d)
{
return (0ULL);
}
/* ARGSUSED */
void
conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
{
}
/* ARGSUSED */
void
conv_i32_to_d32(double *d32, uint32_t *i32, int len)
{
}
/* ARGSUSED */
void
conv_i32_to_d16(double *d16, uint32_t *i32, int len)
{
}
/* ARGSUSED */
void
mont_mulf_noconv(uint32_t *result, double *dm1, double *dm2, double *dt,
double *dn, uint32_t *nint, int nlen, double dn0)
{
}
#else /* lint || __lint */
.section ".text",#alloc,#execinstr
.file "mont_mulf.c"
.section ".bss",#alloc,#write
Bbss.bss:
.section ".data",#alloc,#write
Ddata.data:
.section ".rodata",#alloc
!
! CONSTANT POOL
!
Drodata.rodata:
.global TwoTo16
.align 8
!
! CONSTANT POOL
!
.global TwoTo16
TwoTo16:
.word 1089470464
.word 0
.type TwoTo16,#object
.size TwoTo16,8
.global TwoToMinus16
!
! CONSTANT POOL
!
.global TwoToMinus16
TwoToMinus16:
.word 1055916032
.word 0
.type TwoToMinus16,#object
.size TwoToMinus16,8
.global Zero
!
! CONSTANT POOL
!
.global Zero
Zero:
.word 0
.word 0
.type Zero,#object
.size Zero,8
.global TwoTo32
!
! CONSTANT POOL
!
.global TwoTo32
TwoTo32:
.word 1106247680
.word 0
.type TwoTo32,#object
.size TwoTo32,8
.global TwoToMinus32
!
! CONSTANT POOL
!
.global TwoToMinus32
TwoToMinus32:
.word 1039138816
.word 0
.type TwoToMinus32,#object
.size TwoToMinus32,8
.section ".text",#alloc,#execinstr
/* 000000 0 */ .register %g3,#scratch
/* 000000 */ .register %g2,#scratch
/* 000000 0 */ .align 32
! FILE mont_mulf.c
! 1 !/*
! 2 ! * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
! 3 ! * Use is subject to license terms.
! 4 ! */
! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI"
! 9 !/*
! 10 ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time
! 11 ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time
! 12 ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
! 13 ! */
! 15 !#include <sys/types.h>
! 16 !#include <math.h>
! 18 !static const double TwoTo16 = 65536.0;
! 19 !static const double TwoToMinus16 = 1.0/65536.0;
! 20 !static const double Zero = 0.0;
! 21 !static const double TwoTo32 = 65536.0 * 65536.0;
! 22 !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
! 24 !#ifdef RF_INLINE_MACROS
! 26 !double upper32(double);
! 27 !double lower32(double, double);
! 28 !double mod(double, double, double);
! 30 !#else
! 32 !static double
! 33 !upper32(double x)
! 34 !{
! 35 ! return (floor(x * TwoToMinus32));
! 36 !}
! 39 !/* ARGSUSED */
! 40 !static double
! 41 !lower32(double x, double y)
! 42 !{
! 43 ! return (x - TwoTo32 * floor(x * TwoToMinus32));
! 44 !}
! 46 !static double
! 47 !mod(double x, double oneoverm, double m)
! 48 !{
! 49 ! return (x - m * floor(x * oneoverm));
! 50 !}
! 52 !#endif
! 55 !static void
! 56 !cleanup(double *dt, int from, int tlen)
! 57 !{
!
! SUBROUTINE cleanup
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
cleanup:
/* 000000 57 */ sra %o1,0,%o4
/* 0x0004 */ sra %o2,0,%o5
! 58 ! int i;
! 59 ! double tmp, tmp1, x, x1;
! 61 ! tmp = tmp1 = Zero;
/* 0x0008 61 */ sll %o5,1,%g5
! 63 ! for (i = 2 * from; i < 2 * tlen; i += 2) {
/* 0x000c 63 */ sll %o4,1,%g3
/* 0x0010 */ cmp %g3,%g5
/* 0x0014 */ bge,pn %icc,.L77000188
/* 0x0018 0 */ sethi %hi(Zero),%o3
.L77000197:
/* 0x001c 63 */ ldd [%o3+%lo(Zero)],%f8
/* 0x0020 */ sra %g3,0,%o1
/* 0x0024 */ sub %g5,1,%g2
/* 0x0028 */ sllx %o1,3,%g4
! 64 ! x = dt[i];
/* 0x002c 64 */ ldd [%g4+%o0],%f10
/* 0x0030 63 */ add %g4,%o0,%g1
/* 0x0034 */ fmovd %f8,%f18
/* 0x0038 */ fmovd %f8,%f16
! 65 ! x1 = dt[i + 1];
! 66 ! dt[i] = lower32(x, Zero) + tmp;
.L900000110:
/* 0x003c 66 */ fdtox %f10,%f0
/* 0x0040 65 */ ldd [%g1+8],%f12
! 67 ! dt[i + 1] = lower32(x1, Zero) + tmp1;
! 68 ! tmp = upper32(x);
! 69 ! tmp1 = upper32(x1);
/* 0x0044 69 */ add %g3,2,%g3
/* 0x0048 */ cmp %g3,%g2
/* 0x004c 67 */ fdtox %f12,%f2
/* 0x0050 68 */ fmovd %f0,%f4
/* 0x0054 66 */ fmovs %f8,%f0
/* 0x0058 67 */ fmovs %f8,%f2
/* 0x005c 66 */ fxtod %f0,%f0
/* 0x0060 67 */ fxtod %f2,%f2
/* 0x0064 69 */ fdtox %f12,%f6
/* 0x0068 66 */ faddd %f0,%f18,%f10
/* 0x006c */ std %f10,[%g1]
/* 0x0070 67 */ faddd %f2,%f16,%f14
/* 0x0074 */ std %f14,[%g1+8]
/* 0x0078 68 */ fitod %f4,%f18
/* 0x007c 69 */ add %g1,16,%g1
/* 0x0080 */ fitod %f6,%f16
/* 0x0084 */ ble,a,pt %icc,.L900000110
/* 0x0088 64 */ ldd [%g1],%f10
.L77000188:
/* 0x008c 69 */ retl ! Result =
/* 0x0090 */ nop
/* 0x0094 0 */ .type cleanup,2
/* 0x0094 0 */ .size cleanup,(.-cleanup)
.section ".text",#alloc,#execinstr
/* 000000 0 */ .align 8
/* 000000 */ .skip 24
/* 0x0018 */ .align 32
! 70 ! }
! 71 !}
! 75 !#ifdef _KERNEL
! 76 !/*
! 77 ! * This only works if 0 <= d < 2^53
! 78 ! */
! 79 !uint64_t
! 80 !double2uint64_t(double* d)
! 81 !{
! 82 ! uint64_t x;
! 83 ! uint64_t exp;
! 84 ! uint64_t man;
! 86 ! x = *((uint64_t *)d);
!
! SUBROUTINE double2uint64_t
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global double2uint64_t
double2uint64_t:
/* 000000 86 */ ldx [%o0],%o2
! 87 ! if (x == 0) {
/* 0x0004 87 */ cmp %o2,0
/* 0x0008 */ bne,pn %xcc,.L900000206
/* 0x000c 94 */ sethi %hi(0xfff00000),%o5
.L77000202:
/* 0x0010 94 */ retl ! Result = %o0
! 88 ! return (0ULL);
/* 0x0014 88 */ or %g0,0,%o0
! 89 ! }
! 90 ! exp = (x >> 52) - 1023;
! 91 ! man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL;
! 92 ! x = man >> (52 - exp);
! 94 ! return (x);
.L900000206:
/* 0x0018 94 */ sllx %o5,32,%o4
/* 0x001c */ srlx %o2,52,%o0
/* 0x0020 */ sethi %hi(0x40000000),%o1
/* 0x0024 */ or %g0,1023,%g5
/* 0x0028 */ sllx %o1,22,%g4
/* 0x002c */ xor %o4,-1,%o3
/* 0x0030 */ sub %g5,%o0,%g3
/* 0x0034 */ and %o2,%o3,%g2
/* 0x0038 */ or %g2,%g4,%o5
/* 0x003c */ add %g3,52,%g1
/* 0x0040 */ retl ! Result = %o0
/* 0x0044 */ srlx %o5,%g1,%o0
/* 0x0048 0 */ .type double2uint64_t,2
/* 0x0048 0 */ .size double2uint64_t,(.-double2uint64_t)
.section ".text",#alloc,#execinstr
/* 000000 0 */ .align 8
/* 000000 */ .skip 24
/* 0x0018 */ .align 32
! 95 !}
! 96 !#else
! 97 !/*
! 98 ! * This only works if 0 <= d < 2^63
! 99 ! */
! 100 !uint64_t
! 101 !double2uint64_t(double* d)
! 102 !{
! 103 ! return ((int64_t)(*d));
! 104 !}
! 105 !#endif
! 107 !/* ARGSUSED */
! 108 !void
! 109 !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
! 110 !{
!
! SUBROUTINE conv_d16_to_i32
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_d16_to_i32
conv_d16_to_i32:
/* 000000 110 */ save %sp,-176,%sp
! 111 ! int i;
! 112 ! int64_t t, t1, /* using int64_t and not uint64_t */
! 113 ! a, b, c, d; /* because more efficient code is */
! 114 ! /* generated this way, and there */
! 115 ! /* is no overflow */
! 116 ! t1 = 0;
! 117 ! a = double2uint64_t(&(d16[0]));
/* 0x0004 117 */ ldx [%i1],%o0
/* 0x0008 118 */ ldx [%i1+8],%i2
/* 0x000c 117 */ cmp %o0,0
/* 0x0010 */ bne,pn %xcc,.L77000216
/* 0x0014 */ or %g0,0,%i4
.L77000215:
/* 0x0018 117 */ ba .L900000316
/* 0x001c 118 */ cmp %i2,0
.L77000216:
/* 0x0020 117 */ srlx %o0,52,%o5
/* 0x0024 */ sethi %hi(0xfff00000),%i4
/* 0x0028 */ sllx %i4,32,%o2
/* 0x002c */ sethi %hi(0x40000000),%o7
/* 0x0030 */ sllx %o7,22,%o3
/* 0x0034 */ or %g0,1023,%o4
/* 0x0038 */ xor %o2,-1,%g5
/* 0x003c */ sub %o4,%o5,%l0
/* 0x0040 */ and %o0,%g5,%o1
/* 0x0044 */ add %l0,52,%l1
/* 0x0048 */ or %o1,%o3,%g4
! 118 ! b = double2uint64_t(&(d16[1]));
/* 0x004c 118 */ cmp %i2,0
/* 0x0050 117 */ srlx %g4,%l1,%i4
.L900000316:
/* 0x0054 118 */ bne,pn %xcc,.L77000222
/* 0x0058 134 */ sub %i3,1,%l3
.L77000221:
/* 0x005c 118 */ or %g0,0,%i2
/* 0x0060 */ ba .L900000315
/* 0x0064 116 */ or %g0,0,%o3
.L77000222:
/* 0x0068 118 */ srlx %i2,52,%l6
/* 0x006c */ sethi %hi(0xfff00000),%g4
/* 0x0070 */ sllx %g4,32,%i5
/* 0x0074 */ sethi %hi(0x40000000),%l5
/* 0x0078 */ xor %i5,-1,%l4
/* 0x007c */ or %g0,1023,%l2
/* 0x0080 */ and %i2,%l4,%l7
/* 0x0084 */ sllx %l5,22,%i2
/* 0x0088 */ sub %l2,%l6,%g1
/* 0x008c */ or %l7,%i2,%g3
/* 0x0090 */ add %g1,52,%g2
/* 0x0094 116 */ or %g0,0,%o3
/* 0x0098 118 */ srlx %g3,%g2,%i2
! 119 ! for (i = 0; i < ilen - 1; i++) {
.L900000315:
/* 0x009c 119 */ cmp %l3,0
/* 0x00a0 */ ble,pn %icc,.L77000210
/* 0x00a4 */ or %g0,0,%l4
.L77000245:
/* 0x00a8 118 */ sethi %hi(0xfff00000),%l7
/* 0x00ac */ or %g0,-1,%l6
/* 0x00b0 */ sllx %l7,32,%l3
/* 0x00b4 */ srl %l6,0,%l6
/* 0x00b8 */ sethi %hi(0x40000000),%l1
/* 0x00bc */ sethi %hi(0xfc00),%l2
/* 0x00c0 */ xor %l3,-1,%l7
/* 0x00c4 */ sllx %l1,22,%l3
/* 0x00c8 */ sub %i3,2,%l5
/* 0x00cc */ add %l2,1023,%l2
/* 0x00d0 */ or %g0,2,%g2
/* 0x00d4 */ or %g0,%i0,%g1
! 120 ! c = double2uint64_t(&(d16[2 * i + 2]));
.L77000208:
/* 0x00d8 120 */ sra %g2,0,%g3
/* 0x00dc 123 */ add %g2,1,%o2
/* 0x00e0 120 */ sllx %g3,3,%i3
! 121 ! t1 += a & 0xffffffff;
! 122 ! t = (a >> 32);
! 123 ! d = double2uint64_t(&(d16[2 * i + 3]));
/* 0x00e4 123 */ sra %o2,0,%g5
/* 0x00e8 120 */ ldx [%i1+%i3],%o5
/* 0x00ec 123 */ sllx %g5,3,%o0
/* 0x00f0 121 */ and %i4,%l6,%g4
/* 0x00f4 123 */ ldx [%i1+%o0],%i3
/* 0x00f8 120 */ cmp %o5,0
/* 0x00fc */ bne,pn %xcc,.L77000228
/* 0x0100 124 */ and %i2,%l2,%i5
.L77000227:
/* 0x0104 120 */ or %g0,0,%l1
/* 0x0108 */ ba .L900000314
/* 0x010c 121 */ add %o3,%g4,%o0
.L77000228:
/* 0x0110 120 */ srlx %o5,52,%o7
/* 0x0114 */ and %o5,%l7,%o5
/* 0x0118 */ or %g0,52,%l0
/* 0x011c */ sub %o7,1023,%o4
/* 0x0120 */ or %o5,%l3,%l1
/* 0x0124 */ sub %l0,%o4,%o1
/* 0x0128 */ srlx %l1,%o1,%l1
/* 0x012c 121 */ add %o3,%g4,%o0
.L900000314:
/* 0x0130 122 */ srax %i4,32,%g3
/* 0x0134 123 */ cmp %i3,0
/* 0x0138 */ bne,pn %xcc,.L77000234
/* 0x013c 124 */ sllx %i5,16,%g5
.L77000233:
/* 0x0140 123 */ or %g0,0,%o2
/* 0x0144 */ ba .L900000313
/* 0x0148 124 */ add %o0,%g5,%o7
.L77000234:
/* 0x014c 123 */ srlx %i3,52,%o2
/* 0x0150 */ and %i3,%l7,%i4
/* 0x0154 */ sub %o2,1023,%o1
/* 0x0158 */ or %g0,52,%g4
/* 0x015c */ sub %g4,%o1,%i5
/* 0x0160 */ or %i4,%l3,%i3
/* 0x0164 */ srlx %i3,%i5,%o2
! 124 ! t1 += (b & 0xffff) << 16;
/* 0x0168 124 */ add %o0,%g5,%o7
! 125 ! t += (b >> 16) + (t1 >> 32);
.L900000313:
/* 0x016c 125 */ srax %i2,16,%l0
/* 0x0170 */ srax %o7,32,%o4
/* 0x0174 */ add %l0,%o4,%o3
! 126 ! i32[i] = t1 & 0xffffffff;
! 127 ! t1 = t;
! 128 ! a = c;
! 129 ! b = d;
/* 0x0178 129 */ add %l4,1,%l4
/* 0x017c 126 */ and %o7,%l6,%o5
/* 0x0180 125 */ add %g3,%o3,%o3
/* 0x0184 126 */ st %o5,[%g1]
/* 0x0188 128 */ or %g0,%l1,%i4
/* 0x018c 129 */ or %g0,%o2,%i2
/* 0x0190 */ add %g2,2,%g2
/* 0x0194 */ cmp %l4,%l5
/* 0x0198 */ ble,pt %icc,.L77000208
/* 0x019c */ add %g1,4,%g1
! 130 ! }
! 131 ! t1 += a & 0xffffffff;
! 132 ! t = (a >> 32);
! 133 ! t1 += (b & 0xffff) << 16;
! 134 ! i32[i] = t1 & 0xffffffff;
.L77000210:
/* 0x01a0 134 */ sra %l4,0,%l4
/* 0x01a4 */ sethi %hi(0xfc00),%i1
/* 0x01a8 */ add %o3,%i4,%l2
/* 0x01ac */ add %i1,1023,%i5
/* 0x01b0 */ and %i2,%i5,%l5
/* 0x01b4 */ sllx %l4,2,%i2
/* 0x01b8 */ sllx %l5,16,%l6
/* 0x01bc */ add %l2,%l6,%l7
/* 0x01c0 */ st %l7,[%i0+%i2]
/* 0x01c4 129 */ ret ! Result =
/* 0x01c8 */ restore %g0,%g0,%g0
/* 0x01cc 0 */ .type conv_d16_to_i32,2
/* 0x01cc 0 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
.section ".text",#alloc,#execinstr
/* 000000 0 */ .align 8
!
! CONSTANT POOL
!
___const_seg_900000401:
/* 000000 0 */ .word 1127219200,0
/* 0x0008 */ .word 1127219200
/* 0x000c 0 */ .type ___const_seg_900000401,1
/* 0x000c 0 */ .size ___const_seg_900000401,(.-___const_seg_900000401)
/* 0x000c 0 */ .align 8
/* 0x0010 */ .skip 24
/* 0x0028 */ .align 32
! 135 !}
! 138 !void
! 139 !conv_i32_to_d32(double *d32, uint32_t *i32, int len)
! 140 !{
!
! SUBROUTINE conv_i32_to_d32
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d32
conv_i32_to_d32:
/* 000000 140 */ orcc %g0,%o2,%o2
! 141 ! int i;
! 143 !#pragma pipeloop(0)
! 144 ! for (i = 0; i < len; i++)
/* 0x0004 144 */ ble,pn %icc,.L77000254
/* 0x0008 */ sub %o2,1,%o3
.L77000263:
/* 0x000c 140 */ or %g0,%o0,%o2
! 145 ! d32[i] = (double)(i32[i]);
/* 0x0010 145 */ add %o3,1,%o5
/* 0x0014 144 */ or %g0,0,%g5
/* 0x0018 145 */ cmp %o5,10
/* 0x001c */ bl,pn %icc,.L77000261
/* 0x0020 */ sethi %hi(___const_seg_900000401),%g4
.L900000407:
/* 0x0024 145 */ prefetch [%o1],0
/* 0x0028 */ prefetch [%o0],22
/* 0x002c */ sethi %hi(___const_seg_900000401+8),%o4
/* 0x0030 */ or %g0,%o0,%o2
/* 0x0034 */ prefetch [%o1+64],0
/* 0x0038 */ add %o1,8,%o0
/* 0x003c */ sub %o3,7,%o5
/* 0x0040 */ prefetch [%o2+64],22
/* 0x0044 */ or %g0,2,%g5
/* 0x0048 */ prefetch [%o2+128],22
/* 0x004c */ prefetch [%o2+192],22
/* 0x0050 */ prefetch [%o1+128],0
/* 0x0054 */ ld [%o4+%lo(___const_seg_900000401+8)],%f2
/* 0x0058 */ ldd [%g4+%lo(___const_seg_900000401)],%f16
/* 0x005c */ fmovs %f2,%f0
/* 0x0060 */ prefetch [%o2+256],22
/* 0x0064 */ prefetch [%o2+320],22
/* 0x0068 */ ld [%o1],%f3
/* 0x006c */ prefetch [%o1+192],0
/* 0x0070 */ ld [%o1+4],%f1
.L900000405:
/* 0x0074 145 */ prefetch [%o0+188],0
/* 0x0078 */ fsubd %f2,%f16,%f22
/* 0x007c */ add %g5,8,%g5
/* 0x0080 */ add %o0,32,%o0
/* 0x0084 */ ld [%o4+%lo(___const_seg_900000401+8)],%f4
/* 0x0088 */ std %f22,[%o2]
/* 0x008c */ cmp %g5,%o5
/* 0x0090 */ ld [%o0-32],%f5
/* 0x0094 */ fsubd %f0,%f16,%f24
/* 0x0098 */ add %o2,64,%o2
/* 0x009c */ fmovs %f4,%f0
/* 0x00a0 */ std %f24,[%o2-56]
/* 0x00a4 */ ld [%o0-28],%f1
/* 0x00a8 */ fsubd %f4,%f16,%f26
/* 0x00ac */ fmovs %f0,%f6
/* 0x00b0 */ prefetch [%o2+312],22
/* 0x00b4 */ std %f26,[%o2-48]
/* 0x00b8 */ ld [%o0-24],%f7
/* 0x00bc */ fsubd %f0,%f16,%f28
/* 0x00c0 */ fmovs %f6,%f8
/* 0x00c4 */ std %f28,[%o2-40]
/* 0x00c8 */ ld [%o0-20],%f9
/* 0x00cc */ fsubd %f6,%f16,%f30
/* 0x00d0 */ fmovs %f8,%f10
/* 0x00d4 */ std %f30,[%o2-32]
/* 0x00d8 */ ld [%o0-16],%f11
/* 0x00dc */ prefetch [%o2+344],22
/* 0x00e0 */ fsubd %f8,%f16,%f48
/* 0x00e4 */ fmovs %f10,%f12
/* 0x00e8 */ std %f48,[%o2-24]
/* 0x00ec */ ld [%o0-12],%f13
/* 0x00f0 */ fsubd %f10,%f16,%f50
/* 0x00f4 */ fmovs %f12,%f2
/* 0x00f8 */ std %f50,[%o2-16]
/* 0x00fc */ ld [%o0-8],%f3
/* 0x0100 */ fsubd %f12,%f16,%f52
/* 0x0104 */ fmovs %f2,%f0
/* 0x0108 */ std %f52,[%o2-8]
/* 0x010c */ ble,pt %icc,.L900000405
/* 0x0110 */ ld [%o0-4],%f1
.L900000408:
/* 0x0114 145 */ fsubd %f2,%f16,%f18
/* 0x0118 */ add %o2,16,%o2
/* 0x011c */ cmp %g5,%o3
/* 0x0120 */ std %f18,[%o2-16]
/* 0x0124 */ fsubd %f0,%f16,%f20
/* 0x0128 */ or %g0,%o0,%o1
/* 0x012c */ bg,pn %icc,.L77000254
/* 0x0130 */ std %f20,[%o2-8]
.L77000261:
/* 0x0134 145 */ ld [%o1],%f15
.L900000409:
/* 0x0138 145 */ sethi %hi(___const_seg_900000401+8),%o4
/* 0x013c */ ldd [%g4+%lo(___const_seg_900000401)],%f16
/* 0x0140 */ add %g5,1,%g5
/* 0x0144 */ ld [%o4+%lo(___const_seg_900000401+8)],%f14
/* 0x0148 */ add %o1,4,%o1
/* 0x014c */ cmp %g5,%o3
/* 0x0150 */ fsubd %f14,%f16,%f54
/* 0x0154 */ std %f54,[%o2]
/* 0x0158 */ add %o2,8,%o2
/* 0x015c */ ble,a,pt %icc,.L900000409
/* 0x0160 */ ld [%o1],%f15
.L77000254:
/* 0x0164 145 */ retl ! Result =
/* 0x0168 */ nop
/* 0x016c 0 */ .type conv_i32_to_d32,2
/* 0x016c 0 */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
.section ".text",#alloc,#execinstr
/* 000000 0 */ .align 8
!
! CONSTANT POOL
!
___const_seg_900000501:
/* 000000 0 */ .word 1127219200,0
/* 0x0008 */ .word 1127219200
/* 0x000c 0 */ .type ___const_seg_900000501,1
/* 0x000c 0 */ .size ___const_seg_900000501,(.-___const_seg_900000501)
/* 0x000c 0 */ .align 8
/* 0x0010 */ .skip 24
/* 0x0028 */ .align 32
! 146 !}
! 149 !void
! 150 !conv_i32_to_d16(double *d16, uint32_t *i32, int len)
! 151 !{
!
! SUBROUTINE conv_i32_to_d16
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d16
conv_i32_to_d16:
/* 000000 151 */ save %sp,-368,%sp
/* 0x0004 */ orcc %g0,%i2,%i2
! 152 ! int i;
! 153 ! uint32_t a;
! 155 !#pragma pipeloop(0)
! 156 ! for (i = 0; i < len; i++) {
/* 0x0008 156 */ ble,pn %icc,.L77000272
/* 0x000c */ sub %i2,1,%l6
.L77000281:
/* 0x0010 156 */ sethi %hi(0xfc00),%i3
! 157 ! a = i32[i];
/* 0x0014 157 */ or %g0,%i2,%l1
/* 0x0018 156 */ add %i3,1023,%i4
/* 0x001c 157 */ cmp %i2,4
/* 0x0020 151 */ or %g0,%i1,%l7
/* 0x0024 */ or %g0,%i0,%i2
/* 0x0028 156 */ or %g0,0,%i5
/* 0x002c */ or %g0,0,%i3
/* 0x0030 157 */ bl,pn %icc,.L77000279
/* 0x0034 0 */ sethi %hi(___const_seg_900000501),%i1
.L900000508:
/* 0x0038 157 */ prefetch [%i0+8],22
/* 0x003c */ prefetch [%i0+72],22
/* 0x0040 */ or %g0,%i0,%l2
! 158 ! d16[2 * i] = (double)(a & 0xffff);
/* 0x0044 158 */ sethi %hi(___const_seg_900000501+8),%l1
/* 0x0048 157 */ prefetch [%i0+136],22
/* 0x004c */ sub %l6,1,%i0
/* 0x0050 */ or %g0,0,%i3
/* 0x0054 */ prefetch [%i2+200],22
/* 0x0058 */ or %g0,2,%i5
/* 0x005c */ prefetch [%i2+264],22
/* 0x0060 */ prefetch [%i2+328],22
/* 0x0064 */ prefetch [%i2+392],22
/* 0x0068 */ ld [%l7],%l3
/* 0x006c */ ld [%l7+4],%l4
/* 0x0070 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20
! 159 ! d16[2 * i + 1] = (double)(a >> 16);
/* 0x0074 159 */ srl %l3,16,%o1
/* 0x0078 158 */ and %l3,%i4,%o3
/* 0x007c */ st %o3,[%sp+2335]
/* 0x0080 159 */ srl %l4,16,%g4
/* 0x0084 158 */ and %l4,%i4,%o0
/* 0x0088 */ st %o0,[%sp+2303]
/* 0x008c 159 */ add %l7,8,%l7
/* 0x0090 */ st %o1,[%sp+2271]
/* 0x0094 */ st %g4,[%sp+2239]
/* 0x0098 157 */ prefetch [%i2+456],22
/* 0x009c */ prefetch [%i2+520],22
.L900000506:
/* 0x00a0 157 */ prefetch [%l2+536],22
/* 0x00a4 159 */ add %i5,2,%i5
/* 0x00a8 157 */ add %l2,32,%l2
/* 0x00ac */ ld [%l7],%g2
/* 0x00b0 159 */ cmp %i5,%i0
/* 0x00b4 */ add %l7,8,%l7
/* 0x00b8 158 */ ld [%sp+2335],%f9
/* 0x00bc 159 */ add %i3,4,%i3
/* 0x00c0 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f8
/* 0x00c4 159 */ ld [%sp+2271],%f11
/* 0x00c8 158 */ and %g2,%i4,%g3
/* 0x00cc 159 */ fmovs %f8,%f10
/* 0x00d0 158 */ st %g3,[%sp+2335]
/* 0x00d4 */ fsubd %f8,%f20,%f28
/* 0x00d8 */ std %f28,[%l2-32]
/* 0x00dc 159 */ srl %g2,16,%g1
/* 0x00e0 */ st %g1,[%sp+2271]
/* 0x00e4 */ fsubd %f10,%f20,%f30
/* 0x00e8 */ std %f30,[%l2-24]
/* 0x00ec 157 */ ld [%l7-4],%l0
/* 0x00f0 158 */ ld [%sp+2303],%f13
/* 0x00f4 */ ld [%l1+%lo(___const_seg_900000501+8)],%f12
/* 0x00f8 159 */ ld [%sp+2239],%f15
/* 0x00fc 158 */ and %l0,%i4,%l5
/* 0x0100 159 */ fmovs %f12,%f14
/* 0x0104 158 */ st %l5,[%sp+2303]
/* 0x0108 */ fsubd %f12,%f20,%f44
/* 0x010c */ std %f44,[%l2-16]
/* 0x0110 159 */ srl %l0,16,%o5
/* 0x0114 */ st %o5,[%sp+2239]
/* 0x0118 */ fsubd %f14,%f20,%f46
/* 0x011c */ ble,pt %icc,.L900000506
/* 0x0120 */ std %f46,[%l2-8]
.L900000509:
/* 0x0124 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f0
/* 0x0128 159 */ cmp %i5,%l6
/* 0x012c */ add %i3,4,%i3
/* 0x0130 158 */ ld [%sp+2335],%f1
/* 0x0134 */ ld [%sp+2303],%f5
/* 0x0138 159 */ fmovs %f0,%f2
/* 0x013c */ ld [%sp+2271],%f3
/* 0x0140 158 */ fmovs %f0,%f4
/* 0x0144 159 */ ld [%sp+2239],%f7
/* 0x0148 */ fmovs %f0,%f6
/* 0x014c 158 */ fsubd %f0,%f20,%f22
/* 0x0150 */ std %f22,[%l2]
/* 0x0154 159 */ fsubd %f2,%f20,%f24
/* 0x0158 */ std %f24,[%l2+8]
/* 0x015c 158 */ fsubd %f4,%f20,%f26
/* 0x0160 */ std %f26,[%l2+16]
/* 0x0164 159 */ fsubd %f6,%f20,%f20
/* 0x0168 */ bg,pn %icc,.L77000272
/* 0x016c */ std %f20,[%l2+24]
.L77000279:
/* 0x0170 157 */ ld [%l7],%l2
.L900000510:
/* 0x0174 158 */ and %l2,%i4,%o4
/* 0x0178 */ st %o4,[%sp+2399]
/* 0x017c 159 */ srl %l2,16,%o2
/* 0x0180 */ st %o2,[%sp+2367]
/* 0x0184 158 */ sethi %hi(___const_seg_900000501+8),%l1
/* 0x0188 */ sra %i3,0,%i0
/* 0x018c */ ld [%l1+%lo(___const_seg_900000501+8)],%f16
/* 0x0190 */ sllx %i0,3,%o1
/* 0x0194 159 */ add %i3,1,%o3
/* 0x0198 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20
/* 0x019c 159 */ sra %o3,0,%l3
/* 0x01a0 */ add %i5,1,%i5
/* 0x01a4 158 */ ld [%sp+2399],%f17
/* 0x01a8 159 */ sllx %l3,3,%o0
/* 0x01ac */ add %l7,4,%l7
/* 0x01b0 */ fmovs %f16,%f18
/* 0x01b4 */ cmp %i5,%l6
/* 0x01b8 */ add %i3,2,%i3
/* 0x01bc 158 */ fsubd %f16,%f20,%f48
/* 0x01c0 */ std %f48,[%i2+%o1]
/* 0x01c4 159 */ ld [%sp+2367],%f19
/* 0x01c8 */ fsubd %f18,%f20,%f50
/* 0x01cc */ std %f50,[%i2+%o0]
/* 0x01d0 */ ble,a,pt %icc,.L900000510
/* 0x01d4 157 */ ld [%l7],%l2
.L77000272:
/* 0x01d8 159 */ ret ! Result =
/* 0x01dc */ restore %g0,%g0,%g0
/* 0x01e0 0 */ .type conv_i32_to_d16,2
/* 0x01e0 0 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
.section ".text",#alloc,#execinstr
/* 000000 0 */ .align 8
!
! CONSTANT POOL
!
___const_seg_900000601:
/* 000000 0 */ .word 1127219200,0
/* 0x0008 */ .word 1127219200
/* 0x000c 0 */ .type ___const_seg_900000601,1
/* 0x000c 0 */ .size ___const_seg_900000601,(.-___const_seg_900000601)
/* 0x000c 0 */ .align 8
/* 0x0010 */ .skip 24
/* 0x0028 */ .align 32
! 160 ! }
! 161 !}
! 163 !#ifdef RF_INLINE_MACROS
! 165 !void
! 166 !i16_to_d16_and_d32x4(const double *, /* 1/(2^16) */
! 167 ! const double *, /* 2^16 */
! 168 ! const double *, /* 0 */
! 169 ! double *, /* result16 */
! 170 ! double *, /* result32 */
! 171 ! float *); /* source - should be unsigned int* */
! 172 ! /* converted to float* */
! 174 !#else
! 177 !/* ARGSUSED */
! 178 !static void
! 179 !i16_to_d16_and_d32x4(const double *dummy1, /* 1/(2^16) */
! 180 ! const double *dummy2, /* 2^16 */
! 181 ! const double *dummy3, /* 0 */
! 182 ! double *result16,
! 183 ! double *result32,
! 184 ! float *src) /* source - should be unsigned int* */
! 185 ! /* converted to float* */
! 186 !{
! 187 ! uint32_t *i32;
! 188 ! uint32_t a, b, c, d;
! 190 ! i32 = (uint32_t *)src;
! 191 ! a = i32[0];
! 192 ! b = i32[1];
! 193 ! c = i32[2];
! 194 ! d = i32[3];
! 195 ! result16[0] = (double)(a & 0xffff);
! 196 ! result16[1] = (double)(a >> 16);
! 197 ! result32[0] = (double)a;
! 198 ! result16[2] = (double)(b & 0xffff);
! 199 ! result16[3] = (double)(b >> 16);
! 200 ! result32[1] = (double)b;
! 201 ! result16[4] = (double)(c & 0xffff);
! 202 ! result16[5] = (double)(c >> 16);
! 203 ! result32[2] = (double)c;
! 204 ! result16[6] = (double)(d & 0xffff);
! 205 ! result16[7] = (double)(d >> 16);
! 206 ! result32[3] = (double)d;
! 207 !}
! 209 !#endif
! 212 !void
! 213 !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len)
! 214 !{
!
! SUBROUTINE conv_i32_to_d32_and_d16
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global conv_i32_to_d32_and_d16
conv_i32_to_d32_and_d16:
/* 000000 214 */ save %sp,-368,%sp
! 215 ! int i;
! 216 ! uint32_t a;
! 218 !#pragma pipeloop(0)
! 219 ! for (i = 0; i < len - 3; i += 4) {
! 220 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
! 221 ! &(d16[2*i]), &(d32[i]),
! 222 ! (float *)(&(i32[i])));
! 223 ! }
! 224 ! for (; i < len; i++) {
! 225 ! a = i32[i];
! 226 ! d32[i] = (double)(i32[i]);
! 227 ! d16[2 * i] = (double)(a & 0xffff);
! 228 ! d16[2 * i + 1] = (double)(a >> 16);
/* 0x0004 228 */ sub %i3,3,%i4
/* 0x0008 219 */ cmp %i4,0
/* 0x000c */ ble,pn %icc,.L77000289
/* 0x0010 */ or %g0,0,%i5
.L77000306:
/* 0x0014 222 */ sethi %hi(Zero),%g3
/* 0x0018 */ sethi %hi(TwoToMinus16),%g2
/* 0x001c */ sethi %hi(TwoTo16),%o5
/* 0x0020 */ ldd [%g3+%lo(Zero)],%f2
/* 0x0024 219 */ sub %i3,4,%o4
/* 0x0028 */ or %g0,0,%o3
/* 0x002c */ or %g0,%i0,%l6
/* 0x0030 */ or %g0,%i2,%l5
.L900000615:
/* 0x0034 222 */ fmovd %f2,%f26
/* 0x0038 */ ld [%l5],%f27
/* 0x003c */ sra %o3,0,%o0
/* 0x0040 */ add %i5,4,%i5
/* 0x0044 */ fmovd %f2,%f28
/* 0x0048 */ ld [%l5+4],%f29
/* 0x004c */ sllx %o0,3,%g5
/* 0x0050 */ cmp %i5,%o4
/* 0x0054 */ fmovd %f2,%f30
/* 0x0058 */ ld [%l5+8],%f31
/* 0x005c */ add %i1,%g5,%g4
/* 0x0060 */ add %o3,8,%o3
/* 0x0064 */ ld [%l5+12],%f3
/* 0x0068 */ fxtod %f26,%f26
/* 0x006c */ ldd [%g2+%lo(TwoToMinus16)],%f32
/* 0x0070 */ fxtod %f28,%f28
/* 0x0074 */ add %l5,16,%l5
/* 0x0078 */ fxtod %f30,%f30
/* 0x007c */ ldd [%o5+%lo(TwoTo16)],%f34
/* 0x0080 */ fxtod %f2,%f2
/* 0x0084 */ std %f2,[%l6+24]
/* 0x0088 */ fmuld %f32,%f26,%f36
/* 0x008c */ std %f26,[%l6]
/* 0x0090 */ fmuld %f32,%f28,%f38
/* 0x0094 */ std %f28,[%l6+8]
/* 0x0098 */ fmuld %f32,%f30,%f40
/* 0x009c */ std %f30,[%l6+16]
/* 0x00a0 */ fmuld %f32,%f2,%f42
/* 0x00a4 */ add %l6,32,%l6
/* 0x00a8 */ fdtox %f36,%f36
/* 0x00ac */ fdtox %f38,%f38
/* 0x00b0 */ fdtox %f40,%f40
/* 0x00b4 */ fdtox %f42,%f42
/* 0x00b8 */ fxtod %f36,%f36
/* 0x00bc */ std %f36,[%g4+8]
/* 0x00c0 */ fxtod %f38,%f38
/* 0x00c4 */ std %f38,[%g4+24]
/* 0x00c8 */ fxtod %f40,%f40
/* 0x00cc */ std %f40,[%g4+40]
/* 0x00d0 */ fxtod %f42,%f42
/* 0x00d4 */ std %f42,[%g4+56]
/* 0x00d8 */ fmuld %f36,%f34,%f36
/* 0x00dc */ fmuld %f38,%f34,%f38
/* 0x00e0 */ fmuld %f40,%f34,%f40
/* 0x00e4 */ fmuld %f42,%f34,%f42
/* 0x00e8 */ fsubd %f26,%f36,%f36
/* 0x00ec */ std %f36,[%i1+%g5]
/* 0x00f0 */ fsubd %f28,%f38,%f38
/* 0x00f4 */ std %f38,[%g4+16]
/* 0x00f8 */ fsubd %f30,%f40,%f40
/* 0x00fc */ std %f40,[%g4+32]
/* 0x0100 */ fsubd %f2,%f42,%f42
/* 0x0104 */ std %f42,[%g4+48]
/* 0x0108 */ ble,a,pt %icc,.L900000615
/* 0x010c */ ldd [%g3+%lo(Zero)],%f2
.L77000289:
/* 0x0110 224 */ cmp %i5,%i3
/* 0x0114 */ bge,pn %icc,.L77000294
/* 0x0118 */ sethi %hi(0xfc00),%l0
.L77000307:
/* 0x011c 224 */ sra %i5,0,%l2
/* 0x0120 */ sll %i5,1,%i4
/* 0x0124 */ sllx %l2,3,%l1
/* 0x0128 */ sllx %l2,2,%o1
/* 0x012c 225 */ sub %i3,%i5,%l3
/* 0x0130 224 */ add %l0,1023,%l0
/* 0x0134 */ add %l1,%i0,%l1
/* 0x0138 */ add %o1,%i2,%i2
/* 0x013c 225 */ cmp %l3,5
/* 0x0140 */ bl,pn %icc,.L77000291
/* 0x0144 0 */ sethi %hi(___const_seg_900000601),%l7
.L900000612:
/* 0x0148 225 */ prefetch [%l1],22
/* 0x014c */ prefetch [%l1+64],22
/* 0x0150 */ sra %i4,0,%l6
/* 0x0154 226 */ sethi %hi(___const_seg_900000601+8),%l2
/* 0x0158 225 */ prefetch [%l1+128],22
/* 0x015c */ add %l6,-2,%l5
/* 0x0160 */ sub %i3,3,%i0
/* 0x0164 */ prefetch [%l1+192],22
/* 0x0168 */ sllx %l5,3,%o4
/* 0x016c 228 */ add %i5,1,%i5
/* 0x0170 225 */ add %i1,%o4,%o3
/* 0x0174 */ or %g0,%i3,%g1
/* 0x0178 */ ld [%i2],%l4
/* 0x017c */ prefetch [%o3+16],22
/* 0x0180 */ add %o3,16,%l3
/* 0x0184 228 */ add %i2,4,%i2
/* 0x0188 225 */ prefetch [%o3+80],22
/* 0x018c 228 */ srl %l4,16,%o1
/* 0x0190 227 */ and %l4,%l0,%o0
/* 0x0194 225 */ prefetch [%o3+144],22
/* 0x0198 228 */ st %o1,[%sp+2271]
/* 0x019c 227 */ st %o0,[%sp+2239]
/* 0x01a0 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32
/* 0x01a4 228 */ ld [%l2+%lo(___const_seg_900000601+8)],%f0
/* 0x01a8 225 */ prefetch [%o3+208],22
/* 0x01ac */ prefetch [%o3+272],22
/* 0x01b0 */ prefetch [%o3+336],22
.L900000610:
/* 0x01b4 225 */ prefetch [%l1+192],22
/* 0x01b8 228 */ add %i5,4,%i5
/* 0x01bc 225 */ add %l3,64,%l3
/* 0x01c0 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f8
/* 0x01c4 228 */ cmp %i5,%i0
/* 0x01c8 225 */ ld [%i2],%g5
/* 0x01cc 228 */ add %i2,16,%i2
/* 0x01d0 */ add %l1,32,%l1
/* 0x01d4 */ add %i4,8,%i4
/* 0x01d8 226 */ ld [%i2-20],%f7
/* 0x01dc 228 */ srl %g5,16,%i3
/* 0x01e0 226 */ fmovs %f8,%f6
/* 0x01e4 228 */ st %i3,[%sp+2335]
/* 0x01e8 227 */ and %g5,%l0,%g4
/* 0x01ec */ st %g4,[%sp+2303]
/* 0x01f0 226 */ fsubd %f6,%f32,%f40
/* 0x01f4 227 */ ld [%sp+2239],%f9
/* 0x01f8 228 */ ld [%sp+2271],%f1
/* 0x01fc */ fmovs %f8,%f12
/* 0x0200 226 */ std %f40,[%l1-32]
/* 0x0204 227 */ fsubd %f8,%f32,%f42
/* 0x0208 */ std %f42,[%l3-64]
/* 0x020c 228 */ fsubd %f0,%f32,%f44
/* 0x0210 */ std %f44,[%l3-56]
/* 0x0214 227 */ fmovs %f12,%f10
/* 0x0218 225 */ ld [%i2-12],%g2
/* 0x021c 226 */ ld [%i2-16],%f1
/* 0x0220 228 */ srl %g2,16,%g3
/* 0x0224 226 */ fmovs %f12,%f0
/* 0x0228 225 */ prefetch [%l3+320],22
/* 0x022c 228 */ st %g3,[%sp+2271]
/* 0x0230 227 */ and %g2,%l0,%l6
/* 0x0234 */ st %l6,[%sp+2239]
/* 0x0238 226 */ fsubd %f0,%f32,%f46
/* 0x023c 227 */ ld [%sp+2303],%f11
/* 0x0240 228 */ ld [%sp+2335],%f13
/* 0x0244 */ fmovs %f12,%f18
/* 0x0248 226 */ std %f46,[%l1-24]
/* 0x024c 227 */ fsubd %f10,%f32,%f48
/* 0x0250 */ std %f48,[%l3-48]
/* 0x0254 228 */ fsubd %f12,%f32,%f50
/* 0x0258 */ std %f50,[%l3-40]
/* 0x025c 227 */ fmovs %f18,%f16
/* 0x0260 225 */ ld [%i2-8],%o5
/* 0x0264 226 */ ld [%i2-12],%f15
/* 0x0268 228 */ srl %o5,16,%l5
/* 0x026c 226 */ fmovs %f18,%f14
/* 0x0270 228 */ st %l5,[%sp+2335]
/* 0x0274 227 */ and %o5,%l0,%o4
/* 0x0278 */ st %o4,[%sp+2303]
/* 0x027c 226 */ fsubd %f14,%f32,%f52
/* 0x0280 227 */ ld [%sp+2239],%f17
/* 0x0284 228 */ ld [%sp+2271],%f19
/* 0x0288 225 */ prefetch [%l3+352],22
/* 0x028c 228 */ fmovs %f18,%f24
/* 0x0290 226 */ std %f52,[%l1-16]
/* 0x0294 227 */ fsubd %f16,%f32,%f54
/* 0x0298 */ std %f54,[%l3-32]
/* 0x029c 228 */ fsubd %f18,%f32,%f56
/* 0x02a0 */ std %f56,[%l3-24]
/* 0x02a4 227 */ fmovs %f24,%f22
/* 0x02a8 225 */ ld [%i2-4],%l4
/* 0x02ac 226 */ ld [%i2-8],%f21
/* 0x02b0 228 */ srl %l4,16,%o3
/* 0x02b4 226 */ fmovs %f24,%f20
/* 0x02b8 228 */ st %o3,[%sp+2271]
/* 0x02bc 227 */ and %l4,%l0,%o2
/* 0x02c0 */ st %o2,[%sp+2239]
/* 0x02c4 226 */ fsubd %f20,%f32,%f58
/* 0x02c8 227 */ ld [%sp+2303],%f23
/* 0x02cc 228 */ ld [%sp+2335],%f25
/* 0x02d0 */ fmovs %f24,%f0
/* 0x02d4 226 */ std %f58,[%l1-8]
/* 0x02d8 227 */ fsubd %f22,%f32,%f60
/* 0x02dc */ std %f60,[%l3-16]
/* 0x02e0 228 */ fsubd %f24,%f32,%f62
/* 0x02e4 */ bl,pt %icc,.L900000610
/* 0x02e8 */ std %f62,[%l3-8]
.L900000613:
/* 0x02ec 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4
/* 0x02f0 228 */ add %l1,8,%l1
/* 0x02f4 */ cmp %i5,%g1
/* 0x02f8 226 */ ld [%i2-4],%f3
/* 0x02fc 225 */ or %g0,%g1,%i3
/* 0x0300 228 */ add %i4,2,%i4
/* 0x0304 227 */ ld [%sp+2239],%f5
/* 0x0308 226 */ fmovs %f4,%f2
/* 0x030c 228 */ ld [%sp+2271],%f1
/* 0x0310 226 */ fsubd %f2,%f32,%f34
/* 0x0314 */ std %f34,[%l1-8]
/* 0x0318 227 */ fsubd %f4,%f32,%f36
/* 0x031c */ std %f36,[%l3]
/* 0x0320 228 */ fsubd %f0,%f32,%f38
/* 0x0324 */ bge,pn %icc,.L77000294
/* 0x0328 */ std %f38,[%l3+8]
.L77000291:
/* 0x032c 225 */ ld [%i2],%o2
.L900000614:
/* 0x0330 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32
/* 0x0334 228 */ srl %o2,16,%l3
/* 0x0338 227 */ sra %i4,0,%i0
/* 0x033c 228 */ st %l3,[%sp+2367]
/* 0x0340 227 */ and %o2,%l0,%g1
/* 0x0344 226 */ sethi %hi(___const_seg_900000601+8),%l2
/* 0x0348 227 */ st %g1,[%sp+2399]
/* 0x034c */ sllx %i0,3,%o0
/* 0x0350 228 */ add %i4,1,%l4
/* 0x0354 226 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4
/* 0x0358 228 */ sra %l4,0,%o1
/* 0x035c */ add %i5,1,%i5
/* 0x0360 226 */ ld [%i2],%f5
/* 0x0364 228 */ sllx %o1,3,%g5
/* 0x0368 */ cmp %i5,%i3
/* 0x036c */ ld [%sp+2367],%f9
/* 0x0370 */ add %i2,4,%i2
/* 0x0374 */ add %i4,2,%i4
/* 0x0378 227 */ fmovs %f4,%f6
/* 0x037c 226 */ fsubd %f4,%f32,%f44
/* 0x0380 */ std %f44,[%l1]
/* 0x0384 227 */ ld [%sp+2399],%f7
/* 0x0388 228 */ fmovs %f6,%f8
/* 0x038c */ add %l1,8,%l1
/* 0x0390 */ fsubd %f8,%f32,%f48
/* 0x0394 227 */ fsubd %f6,%f32,%f46
/* 0x0398 */ std %f46,[%i1+%o0]
/* 0x039c 228 */ std %f48,[%i1+%g5]
/* 0x03a0 */ bl,a,pt %icc,.L900000614
/* 0x03a4 225 */ ld [%i2],%o2
.L77000294:
/* 0x03a8 222 */ ret ! Result =
/* 0x03ac */ restore %g0,%g0,%g0
/* 0x03b0 0 */ .type conv_i32_to_d32_and_d16,2
/* 0x03b0 0 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
.section ".text",#alloc,#execinstr
/* 000000 0 */ .align 32
! 229 ! }
! 230 !}
! 232 !extern long long c1, c2, c3, c4;
! 234 !static void
! 235 !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len)
! 236 !{
!
! SUBROUTINE adjust_montf_result
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
adjust_montf_result:
/* 000000 236 */ sra %o2,0,%g2
/* 0x0004 */ or %g0,%o0,%o4
! 237 ! int64_t acc;
! 238 ! int i;
! 240 ! if (i32[len] > 0) {
/* 0x0008 240 */ sllx %g2,2,%g3
/* 0x000c */ ld [%o0+%g3],%o0
/* 0x0010 */ cmp %o0,0
/* 0x0014 */ bleu,pn %icc,.L77000316
/* 0x0018 236 */ or %g0,%o1,%o5
! 241 ! i = -1;
.L77000315:
/* 0x001c 241 */ sub %g2,1,%g3
/* 0x0020 */ ba .L900000712
/* 0x0024 249 */ cmp %g2,0
! 242 ! } else {
! 243 ! for (i = len - 1; i >= 0; i--) {
.L77000316:
/* 0x0028 243 */ subcc %g2,1,%g3
/* 0x002c */ bneg,pn %icc,.L77000340
/* 0x0030 */ or %g0,%g3,%o3
.L77000348:
/* 0x0034 243 */ sra %g3,0,%o1
/* 0x0038 */ sllx %o1,2,%g1
! 244 ! if (i32[i] != nint[i]) break;
/* 0x003c 244 */ ld [%g1+%o5],%g4
/* 0x0040 243 */ add %g1,%o4,%o2
/* 0x0044 */ add %g1,%o5,%o1
.L900000713:
/* 0x0048 244 */ ld [%o2],%o0
/* 0x004c */ cmp %o0,%g4
/* 0x0050 */ bne,pn %icc,.L77000324
/* 0x0054 */ sub %o2,4,%o2
.L77000320:
/* 0x0058 244 */ sub %o1,4,%o1
/* 0x005c */ subcc %o3,1,%o3
/* 0x0060 */ bpos,a,pt %icc,.L900000713
/* 0x0064 */ ld [%o1],%g4
.L900000706:
/* 0x0068 244 */ ba .L900000712
/* 0x006c 249 */ cmp %g2,0
.L77000324:
/* 0x0070 244 */ sra %o3,0,%o0
/* 0x0074 */ sllx %o0,2,%g1
/* 0x0078 */ ld [%o5+%g1],%o3
/* 0x007c */ ld [%o4+%g1],%g5
/* 0x0080 */ cmp %g5,%o3
/* 0x0084 */ bleu,pt %icc,.L77000332
/* 0x0088 */ nop
! 245 ! }
! 246 ! }
! 247 ! if ((i < 0) || (i32[i] > nint[i])) {
! 248 ! acc = 0;
! 249 ! for (i = 0; i < len; i++) {
.L77000340:
/* 0x008c 249 */ cmp %g2,0
.L900000712:
/* 0x0090 249 */ ble,pn %icc,.L77000332
/* 0x0094 250 */ or %g0,%g2,%o3
.L77000347:
/* 0x0098 249 */ or %g0,0,%o0
! 250 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
/* 0x009c 250 */ cmp %o3,10
/* 0x00a0 */ bl,pn %icc,.L77000341
/* 0x00a4 249 */ or %g0,0,%g2
.L900000709:
/* 0x00a8 250 */ prefetch [%o4],22
/* 0x00ac */ prefetch [%o4+64],22
! 251 ! i32[i] = acc & 0xffffffff;
! 252 ! acc = acc >> 32;
/* 0x00b0 252 */ add %o5,4,%o1
/* 0x00b4 */ add %o4,8,%o2
/* 0x00b8 250 */ prefetch [%o4+128],22
/* 0x00bc */ sub %o3,8,%o5
/* 0x00c0 */ or %g0,2,%o0
/* 0x00c4 */ prefetch [%o4+192],22
/* 0x00c8 */ prefetch [%o4+256],22
/* 0x00cc */ prefetch [%o4+320],22
/* 0x00d0 */ prefetch [%o4+384],22
/* 0x00d4 */ ld [%o2-4],%g5
/* 0x00d8 */ prefetch [%o2+440],22
/* 0x00dc */ prefetch [%o2+504],22
/* 0x00e0 */ ld [%o4],%g4
/* 0x00e4 */ ld [%o1-4],%o4
/* 0x00e8 */ sub %g4,%o4,%o3
/* 0x00ec 251 */ st %o3,[%o2-8]
/* 0x00f0 252 */ srax %o3,32,%g4
.L900000707:
/* 0x00f4 252 */ add %o0,8,%o0
/* 0x00f8 */ add %o2,32,%o2
/* 0x00fc 250 */ ld [%o1],%g1
/* 0x0100 */ prefetch [%o2+496],22
/* 0x0104 252 */ cmp %o0,%o5
/* 0x0108 */ add %o1,32,%o1
/* 0x010c 250 */ sub %g5,%g1,%g5
/* 0x0110 */ add %g5,%g4,%o4
/* 0x0114 */ ld [%o2-32],%g4
/* 0x0118 251 */ st %o4,[%o2-36]
/* 0x011c 252 */ srax %o4,32,%g1
/* 0x0120 250 */ ld [%o1-28],%o3
/* 0x0124 */ sub %g4,%o3,%g2
/* 0x0128 */ add %g2,%g1,%g5
/* 0x012c */ ld [%o2-28],%o3
/* 0x0130 251 */ st %g5,[%o2-32]
/* 0x0134 252 */ srax %g5,32,%g4
/* 0x0138 250 */ ld [%o1-24],%o4
/* 0x013c */ sub %o3,%o4,%g1
/* 0x0140 */ add %g1,%g4,%g2
/* 0x0144 */ ld [%o2-24],%o3
/* 0x0148 251 */ st %g2,[%o2-28]
/* 0x014c 252 */ srax %g2,32,%g5
/* 0x0150 250 */ ld [%o1-20],%o4
/* 0x0154 */ sub %o3,%o4,%g4
/* 0x0158 */ add %g4,%g5,%g1
/* 0x015c */ ld [%o2-20],%o4
/* 0x0160 251 */ st %g1,[%o2-24]
/* 0x0164 252 */ srax %g1,32,%o3
/* 0x0168 250 */ ld [%o1-16],%g2
/* 0x016c */ sub %o4,%g2,%g5
/* 0x0170 */ add %g5,%o3,%g1
/* 0x0174 */ ld [%o2-16],%g4
/* 0x0178 251 */ st %g1,[%o2-20]
/* 0x017c 252 */ srax %g1,32,%o4
/* 0x0180 250 */ ld [%o1-12],%g2
/* 0x0184 */ sub %g4,%g2,%o3
/* 0x0188 */ add %o3,%o4,%g5
/* 0x018c */ ld [%o2-12],%g2
/* 0x0190 251 */ st %g5,[%o2-16]
/* 0x0194 252 */ srax %g5,32,%g4
/* 0x0198 250 */ ld [%o1-8],%g1
/* 0x019c */ sub %g2,%g1,%o4
/* 0x01a0 */ add %o4,%g4,%o3
/* 0x01a4 */ ld [%o2-8],%g2
/* 0x01a8 251 */ st %o3,[%o2-12]
/* 0x01ac 252 */ srax %o3,32,%g5
/* 0x01b0 250 */ ld [%o1-4],%g1
/* 0x01b4 */ sub %g2,%g1,%g4
/* 0x01b8 */ add %g4,%g5,%o4
/* 0x01bc */ ld [%o2-4],%g5
/* 0x01c0 251 */ st %o4,[%o2-8]
/* 0x01c4 252 */ ble,pt %icc,.L900000707
/* 0x01c8 */ srax %o4,32,%g4
.L900000710:
/* 0x01cc 250 */ ld [%o1],%o3
/* 0x01d0 252 */ add %o1,4,%o5
/* 0x01d4 250 */ or %g0,%o2,%o4
/* 0x01d8 252 */ cmp %o0,%g3
/* 0x01dc 250 */ sub %g5,%o3,%g2
/* 0x01e0 */ add %g2,%g4,%g1
/* 0x01e4 251 */ st %g1,[%o2-4]
/* 0x01e8 252 */ bg,pn %icc,.L77000332
/* 0x01ec */ srax %g1,32,%g2
.L77000341:
/* 0x01f0 250 */ ld [%o4],%g5
.L900000711:
/* 0x01f4 250 */ ld [%o5],%o2
/* 0x01f8 */ add %g2,%g5,%g4
/* 0x01fc 252 */ add %o0,1,%o0
/* 0x0200 */ cmp %o0,%g3
/* 0x0204 */ add %o5,4,%o5
/* 0x0208 250 */ sub %g4,%o2,%o1
/* 0x020c 251 */ st %o1,[%o4]
/* 0x0210 252 */ srax %o1,32,%g2
/* 0x0214 */ add %o4,4,%o4
/* 0x0218 */ ble,a,pt %icc,.L900000711
/* 0x021c 250 */ ld [%o4],%g5
.L77000332:
/* 0x0220 252 */ retl ! Result =
/* 0x0224 */ nop
/* 0x0228 0 */ .type adjust_montf_result,2
/* 0x0228 0 */ .size adjust_montf_result,(.-adjust_montf_result)
.section ".text",#alloc,#execinstr
/* 000000 0 */ .align 32
! 253 ! }
! 254 ! }
! 255 !}
! 257 !/*************
! 258 !static void
! 259 !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len)
! 260 !{
! 261 ! int64_t acc;
! 262 ! int i;
! 264 ! c4++;
! 265 !
! 266 ! if (i32[len] > 0) {
! 267 ! i = -1;
! 268 ! c1++;
! 269 ! } else {
! 270 ! for (i = len - 1; i >= 0; i++) {
! 271 ! if (i32[i] != nint[i]) break;
! 272 ! c2++;
! 273 ! }
! 274 ! }
! 275 ! if ((i < 0) || (i32[i] > nint[i])) {
! 276 ! c3++;
! 277 ! acc = 0;
! 278 ! for (i = 0; i < len; i++) {
! 279 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
! 280 ! i32[i] = acc & 0xffffffff;
! 281 ! acc = acc >> 32;
! 282 ! }
! 283 ! }
! 284 !}
! 285 !uint32_t saveresult[1000];
! 286 !void printarray(char *name, uint32_t *arr, int len)
! 287 !{
! 288 ! int i, j;
! 289 ! uint64_t tmp;
! 291 ! printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2);
! 292 ! for(i=j=0; i<len; i+=2,j+=2){
! 293 ! if(j == 6){
! 294 ! printf("\n");
! 295 ! j=0;
! 296 ! }
! 297 ! tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]);
! 298 ! printf("0x%016llx",tmp);
! 299 ! if((i/2)!=(((len+1)/2)-1))printf(",");
! 300 ! if(j!=4)printf(" ");
! 301 ! }
! 302 ! if(j!=0) printf("\n");
! 303 ! printf("};\n");
! 304 !}
! 305 !**************/
! 308 !/*
! 309 ! * the lengths of the input arrays should be at least the following:
! 310 ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
! 311 ! * all of them should be different from one another
! 312 ! */
! 313 !void mont_mulf_noconv(uint32_t *result,
! 314 ! double *dm1, double *dm2, double *dt,
! 315 ! double *dn, uint32_t *nint,
! 316 ! int nlen, double dn0)
! 317 !{
!
! SUBROUTINE mont_mulf_noconv
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global mont_mulf_noconv
mont_mulf_noconv:
/* 000000 317 */ save %sp,-176,%sp
/* 0x0004 */ ldx [%fp+2223],%g1
/* 0x0008 0 */ sethi %hi(Zero),%l5
/* 0x000c 317 */ or %g0,%i2,%l0
! 318 ! int i, j, jj;
! 319 ! double digit, m2j, a, b;
! 320 ! double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
! 322 ! pdm1 = &(dm1[0]);
! 323 ! pdm2 = &(dm2[0]);
! 324 ! pdn = &(dn[0]);
! 325 ! pdm2[2 * nlen] = Zero;
/* 0x0010 325 */ ldd [%l5+%lo(Zero)],%f0
/* 0x0014 317 */ or %g0,%i0,%i2
/* 0x0018 325 */ sll %g1,1,%o3
! 327 ! if (nlen != 16) {
/* 0x001c 327 */ cmp %g1,16
/* 0x0020 325 */ sra %o3,0,%i0
/* 0x0024 */ sllx %i0,3,%o0
/* 0x0028 317 */ or %g0,%i5,%i0
/* 0x002c 327 */ bne,pn %icc,.L77000476
/* 0x0030 325 */ std %f0,[%l0+%o0]
.L77000488:
/* 0x0034 0 */ sethi %hi(TwoToMinus16),%o2
/* 0x0038 0 */ sethi %hi(TwoTo16),%l3
! 328 ! for (i = 0; i < 4 * nlen + 2; i++)
! 329 ! dt[i] = Zero;
! 330 ! a = dt[0] = pdm1[0] * pdm2[0];
! 331 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
! 333 ! pdtj = &(dt[0]);
! 334 ! for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
! 335 ! m2j = pdm2[j];
! 336 ! a = pdtj[0] + pdn[0] * digit;
! 337 ! b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
! 338 ! pdtj[1] = b;
! 340 !#pragma pipeloop(0)
! 341 ! for (i = 1; i < nlen; i++) {
! 342 ! pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
! 343 ! }
! 344 ! if (jj == 15) {
! 345 ! cleanup(dt, j / 2 + 1, 2 * nlen + 1);
! 346 ! jj = 0;
! 347 ! }
! 349 ! digit = mod(lower32(b, Zero) * dn0,
! 350 ! TwoToMinus16, TwoTo16);
! 351 ! }
! 352 ! } else {
! 353 ! a = dt[0] = pdm1[0] * pdm2[0];
/* 0x003c 353 */ ldd [%i1],%f40
! 355 ! dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
! 356 ! dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
! 357 ! dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
! 358 ! dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
! 359 ! dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
! 360 ! dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
! 361 ! dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
! 362 ! dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
! 363 ! dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
! 364 ! dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
! 365 ! dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
! 366 ! dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
! 367 ! dt[3] = dt[2] = dt[1] = Zero;
! 369 ! pdn_0 = pdn[0];
! 370 ! pdm1_0 = pdm1[0];
! 372 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
! 373 ! pdtj = &(dt[0]);
/* 0x0040 373 */ or %g0,%i3,%o3
! 375 ! for (j = 0; j < 32; j++, pdtj++) {
/* 0x0044 375 */ or %g0,0,%l1
/* 0x0048 353 */ ldd [%l0],%f42
/* 0x004c 372 */ ldd [%o2+%lo(TwoToMinus16)],%f44
/* 0x0050 */ ldd [%l3+%lo(TwoTo16)],%f46
/* 0x0054 367 */ std %f0,[%i3+8]
/* 0x0058 353 */ fmuld %f40,%f42,%f38
/* 0x005c */ std %f38,[%i3]
/* 0x0060 367 */ std %f0,[%i3+16]
/* 0x0064 */ std %f0,[%i3+24]
/* 0x0068 */ std %f0,[%i3+32]
/* 0x006c 372 */ fdtox %f38,%f4
/* 0x0070 367 */ std %f0,[%i3+40]
/* 0x0074 */ std %f0,[%i3+48]
/* 0x0078 */ std %f0,[%i3+56]
/* 0x007c 372 */ fmovs %f0,%f4
/* 0x0080 367 */ std %f0,[%i3+64]
/* 0x0084 */ std %f0,[%i3+72]
/* 0x0088 372 */ fxtod %f4,%f52
/* 0x008c 367 */ std %f0,[%i3+80]
/* 0x0090 */ std %f0,[%i3+88]
/* 0x0094 */ std %f0,[%i3+96]
/* 0x0098 */ std %f0,[%i3+104]
/* 0x009c 372 */ fmuld %f52,%f14,%f60
/* 0x00a0 367 */ std %f0,[%i3+112]
/* 0x00a4 */ std %f0,[%i3+120]
/* 0x00a8 */ std %f0,[%i3+128]
/* 0x00ac */ std %f0,[%i3+136]
/* 0x00b0 372 */ fmuld %f60,%f44,%f62
/* 0x00b4 367 */ std %f0,[%i3+144]
/* 0x00b8 */ std %f0,[%i3+152]
/* 0x00bc */ std %f0,[%i3+160]
/* 0x00c0 */ std %f0,[%i3+168]
/* 0x00c4 372 */ fdtox %f62,%f32
/* 0x00c8 367 */ std %f0,[%i3+176]
/* 0x00cc */ std %f0,[%i3+184]
/* 0x00d0 */ std %f0,[%i3+192]
/* 0x00d4 */ std %f0,[%i3+200]
/* 0x00d8 372 */ fxtod %f32,%f50
/* 0x00dc 367 */ std %f0,[%i3+208]
/* 0x00e0 */ std %f0,[%i3+216]
/* 0x00e4 */ std %f0,[%i3+224]
/* 0x00e8 */ std %f0,[%i3+232]
/* 0x00ec 372 */ fmuld %f50,%f46,%f34
/* 0x00f0 367 */ std %f0,[%i3+240]
/* 0x00f4 */ std %f0,[%i3+248]
/* 0x00f8 */ std %f0,[%i3+256]
/* 0x00fc */ std %f0,[%i3+264]
/* 0x0100 372 */ fsubd %f60,%f34,%f40
/* 0x0104 367 */ std %f0,[%i3+272]
/* 0x0108 */ std %f0,[%i3+280]
/* 0x010c */ std %f0,[%i3+288]
/* 0x0110 */ std %f0,[%i3+296]
/* 0x0114 */ std %f0,[%i3+304]
/* 0x0118 */ std %f0,[%i3+312]
/* 0x011c */ std %f0,[%i3+320]
/* 0x0120 */ std %f0,[%i3+328]
/* 0x0124 */ std %f0,[%i3+336]
/* 0x0128 */ std %f0,[%i3+344]
/* 0x012c */ std %f0,[%i3+352]
/* 0x0130 */ std %f0,[%i3+360]
/* 0x0134 */ std %f0,[%i3+368]
/* 0x0138 375 */ sub %g1,1,%l3
/* 0x013c */ add %i3,8,%o7
/* 0x0140 367 */ std %f0,[%i3+376]
/* 0x0144 */ std %f0,[%i3+384]
/* 0x0148 */ std %f0,[%i3+392]
/* 0x014c */ std %f0,[%i3+400]
/* 0x0150 */ std %f0,[%i3+408]
/* 0x0154 */ std %f0,[%i3+416]
/* 0x0158 */ std %f0,[%i3+424]
/* 0x015c */ std %f0,[%i3+432]
/* 0x0160 */ std %f0,[%i3+440]
/* 0x0164 */ std %f0,[%i3+448]
/* 0x0168 */ std %f0,[%i3+456]
/* 0x016c */ std %f0,[%i3+464]
/* 0x0170 */ std %f0,[%i3+472]
/* 0x0174 */ std %f0,[%i3+480]
/* 0x0178 */ std %f0,[%i3+488]
/* 0x017c */ std %f0,[%i3+496]
/* 0x0180 */ std %f0,[%i3+504]
/* 0x0184 */ std %f0,[%i3+512]
/* 0x0188 */ std %f0,[%i3+520]
!BEGIN HAND CODED PART
! cheetah schedule, no even-odd trick
add %i3,%g0,%o5
fmovd %f40,%f0
fmovd %f14,%f2
fmovd %f44,%f8
sethi %hi(TwoTo32),%l5
fmovd %f46,%f10
sethi %hi(TwoToMinus32),%g5
ldd [%i3],%f6
ldd [%l0],%f4
ldd [%i1],%f40
ldd [%i1+8],%f42
ldd [%i1+16],%f52
ldd [%i1+48],%f54
ldd [%i1+56],%f36
ldd [%i1+64],%f56
ldd [%i1+104],%f48
ldd [%i1+112],%f58
ldd [%i4],%f44
ldd [%i4+8],%f46
ldd [%i4+104],%f50
ldd [%i4+112],%f60
.L99999999:
!1
ldd [%i1+24],%f20
fmuld %f0,%f44,%f12
!2
ldd [%i4+24],%f22
fmuld %f42,%f4,%f16
!3
ldd [%i1+40],%f24
fmuld %f46,%f0,%f18
!4
ldd [%i4+40],%f26
fmuld %f20,%f4,%f20
!5
ldd [%l0+8],%f38
faddd %f12,%f6,%f12
fmuld %f22,%f0,%f22
!6
add %l0,8,%l0
ldd [%i4+56],%f30
fmuld %f24,%f4,%f24
!7
ldd [%i1+72],%f32
faddd %f16,%f18,%f16
fmuld %f26,%f0,%f26
!8
ldd [%i3+16],%f18
fmuld %f40,%f38,%f14
!9
ldd [%i4+72],%f34
faddd %f20,%f22,%f20
fmuld %f8,%f12,%f12
!10
ldd [%i3+48],%f22
fmuld %f36,%f4,%f28
!11
ldd [%i3+8],%f6
faddd %f16,%f18,%f16
fmuld %f30,%f0,%f30
!12
std %f16,[%i3+16]
faddd %f24,%f26,%f24
fmuld %f32,%f4,%f32
!13
ldd [%i3+80],%f26
faddd %f12,%f14,%f12
fmuld %f34,%f0,%f34
!14
ldd [%i1+88],%f16
faddd %f20,%f22,%f20
!15
ldd [%i4+88],%f18
faddd %f28,%f30,%f28
!16
ldd [%i3+112],%f30
faddd %f32,%f34,%f32
!17
ldd [%i3+144],%f34
faddd %f12,%f6,%f6
fmuld %f16,%f4,%f16
!18
std %f20,[%i3+48]
faddd %f24,%f26,%f24
fmuld %f18,%f0,%f18
!19
std %f24,[%i3+80]
faddd %f28,%f30,%f28
fmuld %f48,%f4,%f20
!20
std %f28,[%i3+112]
faddd %f32,%f34,%f32
fmuld %f50,%f0,%f22
!21
ldd [%i1+120],%f24
fdtox %f6,%f12
!22
std %f32,[%i3+144]
faddd %f16,%f18,%f16
!23
ldd [%i4+120],%f26
!24
ldd [%i3+176],%f18
faddd %f20,%f22,%f20
fmuld %f24,%f4,%f24
!25
ldd [%i4+16],%f30
fmovs %f11,%f12
!26
ldd [%i1+32],%f32
fmuld %f26,%f0,%f26
!27
ldd [%i4+32],%f34
fmuld %f52,%f4,%f28
!28
ldd [%i3+208],%f22
faddd %f16,%f18,%f16
fmuld %f30,%f0,%f30
!29
std %f16,[%i3+176]
fxtod %f12,%f12
fmuld %f32,%f4,%f32
!30
ldd [%i4+48],%f18
faddd %f24,%f26,%f24
fmuld %f34,%f0,%f34
!31
ldd [%i3+240],%f26
faddd %f20,%f22,%f20
!32
std %f20,[%i3+208]
faddd %f28,%f30,%f28
fmuld %f54,%f4,%f16
!33
ldd [%i3+32],%f30
fmuld %f12,%f2,%f14
!34
ldd [%i4+64],%f22
faddd %f32,%f34,%f32
fmuld %f18,%f0,%f18
!35
ldd [%i3+64],%f34
faddd %f24,%f26,%f24
!36
std %f24,[%i3+240]
faddd %f28,%f30,%f28
fmuld %f56,%f4,%f20
!37
std %f28,[%i3+32]
fmuld %f14,%f8,%f12
!38
ldd [%i1+80],%f24
faddd %f32,%f34,%f34 ! yes, tmp52!
fmuld %f22,%f0,%f22
!39
ldd [%i4+80],%f26
faddd %f16,%f18,%f16
!40
ldd [%i1+96],%f28
fmuld %f58,%f4,%f32
!41
ldd [%i4+96],%f30
fdtox %f12,%f12
fmuld %f24,%f4,%f24
!42
std %f34,[%i3+64] ! yes, tmp52!
faddd %f20,%f22,%f20
fmuld %f26,%f0,%f26
!43
ldd [%i3+96],%f18
fmuld %f28,%f4,%f28
!44
ldd [%i3+128],%f22
fmovd %f38,%f4
fmuld %f30,%f0,%f30
!45
fxtod %f12,%f12
fmuld %f60,%f0,%f34
!46
add %i3,8,%i3
faddd %f24,%f26,%f24
!47
ldd [%i3+160-8],%f26
faddd %f16,%f18,%f16
!48
std %f16,[%i3+96-8]
faddd %f28,%f30,%f28
!49
ldd [%i3+192-8],%f30
faddd %f32,%f34,%f32
fmuld %f12,%f10,%f12
!50
ldd [%i3+224-8],%f34
faddd %f20,%f22,%f20
!51
std %f20,[%i3+128-8]
faddd %f24,%f26,%f24
!52
add %l1,1,%l1
std %f24,[%i3+160-8]
faddd %f28,%f30,%f28
!53
cmp %l1,15
std %f28,[%i3+192-8]
fsubd %f14,%f12,%f0
!54
faddd %f32,%f34,%f32
ble,pt %icc,.L99999999
std %f32,[%i3+224-8]
!
ldd [%g5+%lo(TwoToMinus32)],%f8
!
ldd [%i3+8],%f16
!
ldd [%i3+16],%f20
!
fmuld %f8,%f16,%f18
ldd [%i3+24],%f24
!
fmuld %f8,%f20,%f22
ldd [%i3+32],%f28
!
fmuld %f8,%f24,%f26
ldd [%l5+%lo(TwoTo32)],%f10
!
fmuld %f8,%f28,%f30
!
fdtox %f18,%f18
!
fdtox %f22,%f22
!
fdtox %f26,%f26
ldd [%i3+40],%f32
!
fdtox %f30,%f30
ldd [%i3+48],%f56
!
fxtod %f18,%f18
fmuld %f8,%f32,%f34
ldd [%i3+56],%f36
!
fxtod %f22,%f22
fmuld %f8,%f56,%f58
ldd [%i3+64],%f38
!
fxtod %f26,%f26
fmuld %f8,%f36,%f60
!
fxtod %f30,%f30
fmuld %f8,%f38,%f62
!
fdtox %f34,%f34
fmuld %f10,%f18,%f40
!
fdtox %f58,%f58
fmuld %f10,%f22,%f42
!
fdtox %f60,%f60
fmuld %f10,%f26,%f44
!
fdtox %f62,%f62
fmuld %f10,%f30,%f46
!
fxtod %f34,%f34
!
fxtod %f58,%f58
!
fxtod %f60,%f60
!
fxtod %f62,%f62
!
fsubd %f16,%f40,%f40
fmuld %f10,%f34,%f48
!
fsubd %f20,%f42,%f42
fmuld %f10,%f58,%f50
!
fsubd %f24,%f44,%f44
fmuld %f10,%f60,%f52
!
fsubd %f28,%f46,%f46
fmuld %f10,%f62,%f54
!
std %f40,[%i3+8]
!
std %f42,[%i3+16]
!
faddd %f18,%f44,%f44
std %f44,[%i3+24]
!
faddd %f22,%f46,%f46
std %f46,[%i3+32]
!
fsubd %f32,%f48,%f48
ldd [%i3+64+8],%f16
!
fsubd %f56,%f50,%f50
ldd [%i3+64+16],%f20
!
fsubd %f36,%f52,%f52
ldd [%i3+64+24],%f24
!
fsubd %f38,%f54,%f54
ldd [%i3+64+32],%f28
!
faddd %f26,%f48,%f48
fmuld %f8,%f16,%f18
std %f48,[%i3+40]
!
faddd %f30,%f50,%f50
fmuld %f8,%f20,%f22
std %f50,[%i3+48]
!
faddd %f34,%f52,%f52
fmuld %f8,%f24,%f26
std %f52,[%i3+56]
!
faddd %f58,%f54,%f54
fmuld %f8,%f28,%f30
std %f54,[%i3+64]
!
fdtox %f18,%f18
!
fdtox %f22,%f22
!
fdtox %f26,%f26
ldd [%i3+64+40],%f32
!
fdtox %f30,%f30
ldd [%i3+64+48],%f56
!
fxtod %f18,%f18
fmuld %f8,%f32,%f34
ldd [%i3+64+56],%f36
!
fxtod %f22,%f22
fmuld %f8,%f56,%f58
ldd [%i3+64+64],%f38
!
fxtod %f26,%f26
fmuld %f8,%f36,%f12
!
fxtod %f30,%f30
fmuld %f8,%f38,%f14
!
fdtox %f34,%f34
fmuld %f10,%f18,%f40
!
fdtox %f58,%f58
fmuld %f10,%f22,%f42
!
fdtox %f12,%f12
fmuld %f10,%f26,%f44
!
fdtox %f14,%f14
fmuld %f10,%f30,%f46
!
fxtod %f34,%f34
!
fxtod %f58,%f58
!
fxtod %f12,%f12
!
fxtod %f14,%f14
!
fsubd %f16,%f40,%f40
fmuld %f10,%f34,%f48
!
fsubd %f20,%f42,%f42
fmuld %f10,%f58,%f50
!
fsubd %f24,%f44,%f44
fmuld %f10,%f12,%f52
!
fsubd %f28,%f46,%f46
fmuld %f10,%f14,%f54
!
faddd %f60,%f40,%f40
std %f40,[%i3+64+8]
!
faddd %f62,%f42,%f42
std %f42,[%i3+64+16]
!
faddd %f18,%f44,%f44
std %f44,[%i3+64+24]
!
faddd %f22,%f46,%f46
std %f46,[%i3+64+32]
!
fsubd %f32,%f48,%f48
ldd [%i3+64+64+8],%f16
!
fsubd %f56,%f50,%f50
ldd [%i3+64+64+16],%f20
!
fsubd %f36,%f52,%f52
ldd [%i3+64+64+24],%f24
!
fsubd %f38,%f54,%f54
ldd [%i3+64+64+32],%f28
!
faddd %f26,%f48,%f48
fmuld %f8,%f16,%f18
std %f48,[%i3+64+40]
!
faddd %f30,%f50,%f50
fmuld %f8,%f20,%f22
std %f50,[%i3+64+48]
!
faddd %f34,%f52,%f52
fmuld %f8,%f24,%f26
std %f52,[%i3+64+56]
!
faddd %f58,%f54,%f54
fmuld %f8,%f28,%f30
std %f54,[%i3+64+64]
!
fdtox %f18,%f18
!
fdtox %f22,%f22
!
fdtox %f26,%f26
ldd [%i3+64+64+40],%f32
!
fdtox %f30,%f30
ldd [%i3+64+64+48],%f56
!
fxtod %f18,%f18
fmuld %f8,%f32,%f34
ldd [%i3+64+64+56],%f36
!
fxtod %f22,%f22
fmuld %f8,%f56,%f58
ldd [%i3+64+64+64],%f38
!
fxtod %f26,%f26
fmuld %f8,%f36,%f60
!
fxtod %f30,%f30
fmuld %f8,%f38,%f62
!
fdtox %f34,%f34
fmuld %f10,%f18,%f40
!
fdtox %f58,%f58
fmuld %f10,%f22,%f42
!
fdtox %f60,%f60
fmuld %f10,%f26,%f44
!
fdtox %f62,%f62
fmuld %f10,%f30,%f46
!
fxtod %f34,%f34
!
fxtod %f58,%f58
!
fxtod %f60,%f60
!
fxtod %f62,%f62
!
fsubd %f16,%f40,%f40
fmuld %f10,%f34,%f48
!
fsubd %f20,%f42,%f42
fmuld %f10,%f58,%f50
!
fsubd %f24,%f44,%f44
fmuld %f10,%f60,%f52
!
fsubd %f28,%f46,%f46
fmuld %f10,%f62,%f54
!
faddd %f12,%f40,%f40
std %f40,[%i3+64+64+8]
!
faddd %f14,%f42,%f42
std %f42,[%i3+64+64+16]
!
faddd %f18,%f44,%f44
std %f44,[%i3+64+64+24]
!
faddd %f22,%f46,%f46
std %f46,[%i3+64+64+32]
!
fsubd %f32,%f48,%f48
ldd [%i3+64+64+64+8],%f16
!
fsubd %f56,%f50,%f50
ldd [%i3+64+64+64+16],%f20
!
fsubd %f36,%f52,%f52
ldd [%i3+64+64+64+24],%f24
!
fsubd %f38,%f54,%f54
ldd [%i3+64+64+64+32],%f28
!
faddd %f26,%f48,%f48
fmuld %f8,%f16,%f18
std %f48,[%i3+64+64+40]
!
faddd %f30,%f50,%f50
fmuld %f8,%f20,%f22
std %f50,[%i3+64+64+48]
!
faddd %f34,%f52,%f52
fmuld %f8,%f24,%f26
std %f52,[%i3+64+64+56]
!
faddd %f58,%f54,%f54
fmuld %f8,%f28,%f30
std %f54,[%i3+64+64+64]
!
fdtox %f18,%f18
!
fdtox %f22,%f22
!
fdtox %f26,%f26
ldd [%i3+64+64+64+40],%f32
!
fdtox %f30,%f30
ldd [%i3+64+64+64+48],%f56
!
fxtod %f18,%f18
fmuld %f8,%f32,%f34
ldd [%i3+64+64+64+56],%f36
!
fxtod %f22,%f22
fmuld %f8,%f56,%f58
ldd [%i3+64+64+64+64],%f38
!
fxtod %f26,%f26
fmuld %f8,%f36,%f12
!
fxtod %f30,%f30
fmuld %f8,%f38,%f14
!
fdtox %f34,%f34
fmuld %f10,%f18,%f40
!
fdtox %f58,%f58
fmuld %f10,%f22,%f42
!
fdtox %f12,%f12
fmuld %f10,%f26,%f44
!
fdtox %f14,%f14
fmuld %f10,%f30,%f46
!
sethi %hi(TwoToMinus16),%g5
fxtod %f34,%f34
!
sethi %hi(TwoTo16),%l5
fxtod %f58,%f58
!
fxtod %f12,%f12
!
fxtod %f14,%f14
!
fsubd %f16,%f40,%f16
fmuld %f10,%f34,%f48
ldd [%g5+%lo(TwoToMinus16)],%f8
!
fsubd %f20,%f42,%f20
fmuld %f10,%f58,%f50
ldd [%i1],%f40 ! should be %f40
!
fsubd %f24,%f44,%f24
fmuld %f10,%f12,%f52
ldd [%i1+8],%f42 ! should be %f42
!
fsubd %f28,%f46,%f28
fmuld %f10,%f14,%f54
ldd [%i4],%f44 ! should be %f44
!
faddd %f60,%f16,%f16
std %f16,[%i3+64+64+64+8]
!
faddd %f62,%f20,%f20
std %f20,[%i3+64+64+64+16]
!
faddd %f18,%f24,%f24
std %f24,[%i3+64+64+64+24]
!
faddd %f22,%f28,%f28
std %f28,[%i3+64+64+64+32]
!
fsubd %f32,%f48,%f32
ldd [%i4+8],%f46 ! should be %f46
!
fsubd %f56,%f50,%f56
ldd [%i1+104],%f48 ! should be %f48
!
fsubd %f36,%f52,%f36
ldd [%i4+104],%f50 ! should be %f50
!
fsubd %f38,%f54,%f38
ldd [%i1+16],%f52 ! should be %f52
!
faddd %f26,%f32,%f32
std %f32,[%i3+64+64+64+40]
!
faddd %f30,%f56,%f56
std %f56,[%i3+64+64+64+48]
!
faddd %f34,%f36,%f36
std %f36,[%i3+64+64+64+56]
!
faddd %f58,%f38,%f38
std %f38,[%i3+64+64+64+64]
!
std %f12,[%i3+64+64+64+64+8]
!
std %f14,[%i3+64+64+64+64+16]
!
ldd [%l5+%lo(TwoTo16)],%f10
ldd [%i1+48],%f54
ldd [%i1+56],%f36
ldd [%i1+64],%f56
ldd [%i1+112],%f58
ldd [%i4+104],%f50
ldd [%i4+112],%f60
.L99999998:
!1
ldd [%i1+24],%f20
fmuld %f0,%f44,%f12
!2
ldd [%i4+24],%f22
fmuld %f42,%f4,%f16
!3
ldd [%i1+40],%f24
fmuld %f46,%f0,%f18
!4
ldd [%i4+40],%f26
fmuld %f20,%f4,%f20
!5
ldd [%l0+8],%f38
faddd %f12,%f6,%f12
fmuld %f22,%f0,%f22
!6
add %l0,8,%l0
ldd [%i4+56],%f30
fmuld %f24,%f4,%f24
!7
ldd [%i1+72],%f32
faddd %f16,%f18,%f16
fmuld %f26,%f0,%f26
!8
ldd [%i3+16],%f18
fmuld %f40,%f38,%f14
!9
ldd [%i4+72],%f34
faddd %f20,%f22,%f20
fmuld %f8,%f12,%f12
!10
ldd [%i3+48],%f22
fmuld %f36,%f4,%f28
!11
ldd [%i3+8],%f6
faddd %f16,%f18,%f16
fmuld %f30,%f0,%f30
!12
std %f16,[%i3+16]
faddd %f24,%f26,%f24
fmuld %f32,%f4,%f32
!13
ldd [%i3+80],%f26
faddd %f12,%f14,%f12
fmuld %f34,%f0,%f34
!14
ldd [%i1+88],%f16
faddd %f20,%f22,%f20
!15
ldd [%i4+88],%f18
faddd %f28,%f30,%f28
!16
ldd [%i3+112],%f30
faddd %f32,%f34,%f32
!17
ldd [%i3+144],%f34
faddd %f12,%f6,%f6
fmuld %f16,%f4,%f16
!18
std %f20,[%i3+48]
faddd %f24,%f26,%f24
fmuld %f18,%f0,%f18
!19
std %f24,[%i3+80]
faddd %f28,%f30,%f28
fmuld %f48,%f4,%f20
!20
std %f28,[%i3+112]
faddd %f32,%f34,%f32
fmuld %f50,%f0,%f22
!21
ldd [%i1+120],%f24
fdtox %f6,%f12
!22
std %f32,[%i3+144]
faddd %f16,%f18,%f16
!23
ldd [%i4+120],%f26
!24
ldd [%i3+176],%f18
faddd %f20,%f22,%f20
fmuld %f24,%f4,%f24
!25
ldd [%i4+16],%f30
fmovs %f11,%f12
!26
ldd [%i1+32],%f32
fmuld %f26,%f0,%f26
!27
ldd [%i4+32],%f34
fmuld %f52,%f4,%f28
!28
ldd [%i3+208],%f22
faddd %f16,%f18,%f16
fmuld %f30,%f0,%f30
!29
std %f16,[%i3+176]
fxtod %f12,%f12
fmuld %f32,%f4,%f32
!30
ldd [%i4+48],%f18
faddd %f24,%f26,%f24
fmuld %f34,%f0,%f34
!31
ldd [%i3+240],%f26
faddd %f20,%f22,%f20
!32
std %f20,[%i3+208]
faddd %f28,%f30,%f28
fmuld %f54,%f4,%f16
!33
ldd [%i3+32],%f30
fmuld %f12,%f2,%f14
!34
ldd [%i4+64],%f22
faddd %f32,%f34,%f32
fmuld %f18,%f0,%f18
!35
ldd [%i3+64],%f34
faddd %f24,%f26,%f24
!36
std %f24,[%i3+240]
faddd %f28,%f30,%f28
fmuld %f56,%f4,%f20
!37
std %f28,[%i3+32]
fmuld %f14,%f8,%f12
!38
ldd [%i1+80],%f24
faddd %f32,%f34,%f34 ! yes, tmp52!
fmuld %f22,%f0,%f22
!39
ldd [%i4+80],%f26
faddd %f16,%f18,%f16
!40
ldd [%i1+96],%f28
fmuld %f58,%f4,%f32
!41
ldd [%i4+96],%f30
fdtox %f12,%f12
fmuld %f24,%f4,%f24
!42
std %f34,[%i3+64] ! yes, tmp52!
faddd %f20,%f22,%f20
fmuld %f26,%f0,%f26
!43
ldd [%i3+96],%f18
fmuld %f28,%f4,%f28
!44
ldd [%i3+128],%f22
fmovd %f38,%f4
fmuld %f30,%f0,%f30
!45
fxtod %f12,%f12
fmuld %f60,%f0,%f34
!46
add %i3,8,%i3
faddd %f24,%f26,%f24
!47
ldd [%i3+160-8],%f26
faddd %f16,%f18,%f16
!48
std %f16,[%i3+96-8]
faddd %f28,%f30,%f28
!49
ldd [%i3+192-8],%f30
faddd %f32,%f34,%f32
fmuld %f12,%f10,%f12
!50
ldd [%i3+224-8],%f34
faddd %f20,%f22,%f20
!51
std %f20,[%i3+128-8]
faddd %f24,%f26,%f24
!52
add %l1,1,%l1
std %f24,[%i3+160-8]
faddd %f28,%f30,%f28
!53
cmp %l1,31
std %f28,[%i3+192-8]
fsubd %f14,%f12,%f0
!54
faddd %f32,%f34,%f32
ble,pt %icc,.L99999998
std %f32,[%i3+224-8]
!55
std %f6,[%i3]
add %o5,%g0,%i3
!END HAND CODED PART
.L900000828:
/* 0x03e4 405 */ ba .L900000852
/* 0x03e8 409 */ ldx [%i3+%o0],%l1
! 406 ! }
! 407 ! }
! 409 ! conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1);
! 411 !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/
! 413 ! adjust_montf_result(result, nint, nlen);
.L77000476:
/* 0x03ec 413 */ sll %g1,2,%l3
/* 0x03f0 0 */ sethi %hi(TwoTo16),%g5
/* 0x03f4 413 */ add %l3,2,%l2
/* 0x03f8 328 */ cmp %l2,0
/* 0x03fc */ ble,pn %icc,.L77000482
/* 0x0400 0 */ sethi %hi(TwoToMinus16),%o2
.L77000514:
/* 0x0404 329 */ add %l3,2,%l2
/* 0x0408 328 */ add %l3,1,%o4
/* 0x040c */ or %g0,0,%l3
/* 0x0410 329 */ cmp %l2,8
/* 0x0414 */ bl,pn %icc,.L77000477
/* 0x0418 328 */ or %g0,%i3,%l1
.L900000831:
/* 0x041c 329 */ prefetch [%i3],22
/* 0x0420 */ sub %o4,7,%l4
/* 0x0424 */ or %g0,0,%l3
/* 0x0428 */ or %g0,%i3,%l1
.L900000829:
/* 0x042c 329 */ prefetch [%l1+528],22
/* 0x0430 */ std %f0,[%l1]
/* 0x0434 */ add %l3,8,%l3
/* 0x0438 */ add %l1,64,%l1
/* 0x043c */ std %f0,[%l1-56]
/* 0x0440 */ cmp %l3,%l4
/* 0x0444 */ std %f0,[%l1-48]
/* 0x0448 */ std %f0,[%l1-40]
/* 0x044c */ prefetch [%l1+496],22
/* 0x0450 */ std %f0,[%l1-32]
/* 0x0454 */ std %f0,[%l1-24]
/* 0x0458 */ std %f0,[%l1-16]
/* 0x045c */ ble,pt %icc,.L900000829
/* 0x0460 */ std %f0,[%l1-8]
.L900000832:
/* 0x0464 329 */ cmp %l3,%o4
/* 0x0468 */ bg,pn %icc,.L77000482
/* 0x046c */ nop
.L77000477:
/* 0x0470 329 */ add %l3,1,%l3
.L900000851:
/* 0x0474 329 */ std %f0,[%l1]
/* 0x0478 */ cmp %l3,%o4
/* 0x047c */ add %l1,8,%l1
/* 0x0480 */ ble,pt %icc,.L900000851
/* 0x0484 */ add %l3,1,%l3
.L77000482:
/* 0x0488 330 */ ldd [%i1],%f40
/* 0x048c 334 */ cmp %o3,0
/* 0x0490 */ sub %g1,1,%l3
/* 0x0494 330 */ ldd [%l0],%f42
/* 0x0498 331 */ ldd [%o2+%lo(TwoToMinus16)],%f36
/* 0x049c */ ldd [%g5+%lo(TwoTo16)],%f38
/* 0x04a0 330 */ fmuld %f40,%f42,%f52
/* 0x04a4 331 */ fdtox %f52,%f8
/* 0x04a8 */ fmovs %f0,%f8
/* 0x04ac */ fxtod %f8,%f62
/* 0x04b0 */ fmuld %f62,%f14,%f60
/* 0x04b4 */ fmuld %f60,%f36,%f32
/* 0x04b8 */ fdtox %f32,%f50
/* 0x04bc */ fxtod %f50,%f34
/* 0x04c0 */ fmuld %f34,%f38,%f46
/* 0x04c4 */ fsubd %f60,%f46,%f40
/* 0x04c8 334 */ ble,pn %icc,.L77000378
/* 0x04cc 330 */ std %f52,[%i3]
.L77000509:
/* 0x04d0 345 */ add %o3,1,%g5
/* 0x04d4 */ sll %g5,1,%o2
/* 0x04d8 */ or %g0,0,%l1
/* 0x04dc 337 */ ldd [%i4],%f42
/* 0x04e0 345 */ sub %o3,1,%o3
/* 0x04e4 */ or %g0,0,%o5
/* 0x04e8 */ or %g0,%i3,%l2
/* 0x04ec */ add %i4,8,%o1
/* 0x04f0 */ add %i1,8,%g5
.L900000848:
/* 0x04f4 337 */ fmuld %f40,%f42,%f34
/* 0x04f8 */ ldd [%l0+8],%f32
/* 0x04fc 341 */ cmp %g1,1
/* 0x0500 337 */ ldd [%i1],%f50
/* 0x0504 */ ldd [%l2],%f46
/* 0x0508 */ ldd [%l2+8],%f44
/* 0x050c */ fmuld %f50,%f32,%f60
/* 0x0510 335 */ ldd [%l0],%f42
/* 0x0514 337 */ faddd %f46,%f34,%f48
/* 0x0518 */ faddd %f44,%f60,%f58
/* 0x051c */ fmuld %f36,%f48,%f54
/* 0x0520 */ faddd %f58,%f54,%f34
/* 0x0524 341 */ ble,pn %icc,.L77000368
/* 0x0528 338 */ std %f34,[%l2+8]
.L77000507:
/* 0x052c 341 */ or %g0,1,%l5
/* 0x0530 */ or %g0,2,%l4
/* 0x0534 */ or %g0,%g5,%g4
/* 0x0538 342 */ cmp %l3,12
/* 0x053c */ bl,pn %icc,.L77000481
/* 0x0540 341 */ or %g0,%o1,%g3
.L900000839:
/* 0x0544 342 */ prefetch [%i1+8],0
/* 0x0548 */ prefetch [%i1+72],0
/* 0x054c */ add %i4,40,%l6
/* 0x0550 */ add %i1,40,%l7
/* 0x0554 */ prefetch [%l2+16],0
/* 0x0558 */ or %g0,%l2,%o7
/* 0x055c */ sub %l3,7,%i5
/* 0x0560 */ prefetch [%l2+80],0
/* 0x0564 */ add %l2,80,%g2
/* 0x0568 */ or %g0,2,%l4
/* 0x056c */ prefetch [%i1+136],0
/* 0x0570 */ or %g0,5,%l5
/* 0x0574 */ prefetch [%i1+200],0
/* 0x0578 */ prefetch [%l2+144],0
/* 0x057c */ ldd [%i4+8],%f52
/* 0x0580 */ ldd [%i4+16],%f44
/* 0x0584 */ ldd [%i4+24],%f56
/* 0x0588 */ fmuld %f40,%f52,%f48
/* 0x058c */ fmuld %f40,%f44,%f46
/* 0x0590 */ fmuld %f40,%f56,%f44
/* 0x0594 */ ldd [%l2+48],%f56
/* 0x0598 */ prefetch [%l2+208],0
/* 0x059c */ prefetch [%l2+272],0
/* 0x05a0 */ prefetch [%l2+336],0
/* 0x05a4 */ prefetch [%l2+400],0
/* 0x05a8 */ ldd [%i1+8],%f32
/* 0x05ac */ ldd [%i1+16],%f60
/* 0x05b0 */ ldd [%i1+24],%f50
/* 0x05b4 */ fmuld %f42,%f32,%f62
/* 0x05b8 */ ldd [%i1+32],%f32
/* 0x05bc */ fmuld %f42,%f60,%f58
/* 0x05c0 */ ldd [%l2+16],%f52
/* 0x05c4 */ ldd [%l2+32],%f54
/* 0x05c8 */ faddd %f62,%f48,%f60
/* 0x05cc */ fmuld %f42,%f50,%f48
/* 0x05d0 */ faddd %f58,%f46,%f62
/* 0x05d4 */ ldd [%i4+32],%f46
/* 0x05d8 */ ldd [%l2+64],%f58
.L900000837:
/* 0x05dc 342 */ prefetch [%l7+192],0
/* 0x05e0 */ fmuld %f40,%f46,%f46
/* 0x05e4 */ faddd %f60,%f52,%f60
/* 0x05e8 */ ldd [%l6],%f52
/* 0x05ec */ std %f60,[%g2-64]
/* 0x05f0 */ fmuld %f42,%f32,%f50
/* 0x05f4 */ add %l5,8,%l5
/* 0x05f8 */ ldd [%l7],%f60
/* 0x05fc */ faddd %f48,%f44,%f48
/* 0x0600 */ cmp %l5,%i5
/* 0x0604 */ ldd [%g2],%f32
/* 0x0608 */ add %g2,128,%g2
/* 0x060c */ prefetch [%g2+256],0
/* 0x0610 */ fmuld %f40,%f52,%f52
/* 0x0614 */ faddd %f62,%f54,%f44
/* 0x0618 */ ldd [%l6+8],%f54
/* 0x061c */ std %f44,[%g2-176]
/* 0x0620 */ fmuld %f42,%f60,%f44
/* 0x0624 */ add %l6,64,%l6
/* 0x0628 */ ldd [%l7+8],%f60
/* 0x062c */ faddd %f50,%f46,%f50
/* 0x0630 */ add %l7,64,%l7
/* 0x0634 */ add %l4,16,%l4
/* 0x0638 */ ldd [%g2-112],%f46
/* 0x063c */ fmuld %f40,%f54,%f54
/* 0x0640 */ faddd %f48,%f56,%f62
/* 0x0644 */ ldd [%l6-48],%f56
/* 0x0648 */ std %f62,[%g2-160]
/* 0x064c */ fmuld %f42,%f60,%f48
/* 0x0650 */ ldd [%l7-48],%f60
/* 0x0654 */ faddd %f44,%f52,%f52
/* 0x0658 */ ldd [%g2-96],%f30
/* 0x065c */ prefetch [%g2+288],0
/* 0x0660 */ fmuld %f40,%f56,%f56
/* 0x0664 */ faddd %f50,%f58,%f62
/* 0x0668 */ ldd [%l6-40],%f58
/* 0x066c */ std %f62,[%g2-144]
/* 0x0670 */ fmuld %f42,%f60,%f50
/* 0x0674 */ ldd [%l7-40],%f62
/* 0x0678 */ faddd %f48,%f54,%f54
/* 0x067c */ ldd [%g2-80],%f28
/* 0x0680 */ prefetch [%l7+160],0
/* 0x0684 */ fmuld %f40,%f58,%f48
/* 0x0688 */ faddd %f52,%f32,%f44
/* 0x068c */ ldd [%l6-32],%f58
/* 0x0690 */ std %f44,[%g2-128]
/* 0x0694 */ fmuld %f42,%f62,%f44
/* 0x0698 */ ldd [%l7-32],%f60
/* 0x069c */ faddd %f50,%f56,%f56
/* 0x06a0 */ ldd [%g2-64],%f52
/* 0x06a4 */ prefetch [%g2+320],0
/* 0x06a8 */ fmuld %f40,%f58,%f50
/* 0x06ac */ faddd %f54,%f46,%f32
/* 0x06b0 */ ldd [%l6-24],%f62
/* 0x06b4 */ std %f32,[%g2-112]
/* 0x06b8 */ fmuld %f42,%f60,%f46
/* 0x06bc */ ldd [%l7-24],%f60
/* 0x06c0 */ faddd %f44,%f48,%f48
/* 0x06c4 */ ldd [%g2-48],%f54
/* 0x06c8 */ fmuld %f40,%f62,%f26
/* 0x06cc */ faddd %f56,%f30,%f32
/* 0x06d0 */ ldd [%l6-16],%f58
/* 0x06d4 */ std %f32,[%g2-96]
/* 0x06d8 */ fmuld %f42,%f60,%f30
/* 0x06dc */ ldd [%l7-16],%f32
/* 0x06e0 */ faddd %f46,%f50,%f60
/* 0x06e4 */ ldd [%g2-32],%f56
/* 0x06e8 */ prefetch [%g2+352],0
/* 0x06ec */ fmuld %f40,%f58,%f44
/* 0x06f0 */ faddd %f48,%f28,%f62
/* 0x06f4 */ ldd [%l6-8],%f46
/* 0x06f8 */ std %f62,[%g2-80]
/* 0x06fc */ fmuld %f42,%f32,%f48
/* 0x0700 */ ldd [%l7-8],%f32
/* 0x0704 */ faddd %f30,%f26,%f62
/* 0x0708 */ ble,pt %icc,.L900000837
/* 0x070c */ ldd [%g2-16],%f58
.L900000840:
/* 0x0710 342 */ fmuld %f40,%f46,%f46
/* 0x0714 */ faddd %f62,%f54,%f62
/* 0x0718 */ std %f62,[%g2-48]
/* 0x071c */ cmp %l5,%l3
/* 0x0720 */ fmuld %f42,%f32,%f50
/* 0x0724 */ faddd %f48,%f44,%f48
/* 0x0728 */ or %g0,%l7,%g4
/* 0x072c */ or %g0,%l6,%g3
/* 0x0730 */ faddd %f60,%f52,%f60
/* 0x0734 */ std %f60,[%g2-64]
/* 0x0738 */ or %g0,%o7,%l2
/* 0x073c */ add %l4,8,%l4
/* 0x0740 */ faddd %f50,%f46,%f54
/* 0x0744 */ faddd %f48,%f56,%f56
/* 0x0748 */ std %f56,[%g2-32]
/* 0x074c */ faddd %f54,%f58,%f58
/* 0x0750 */ bg,pn %icc,.L77000368
/* 0x0754 */ std %f58,[%g2-16]
.L77000481:
/* 0x0758 342 */ ldd [%g4],%f44
.L900000850:
/* 0x075c 342 */ ldd [%g3],%f48
/* 0x0760 */ fmuld %f42,%f44,%f58
/* 0x0764 */ sra %l4,0,%l7
/* 0x0768 */ add %l5,1,%l5
/* 0x076c */ sllx %l7,3,%g2
/* 0x0770 */ add %g4,8,%g4
/* 0x0774 */ ldd [%l2+%g2],%f56
/* 0x0778 */ cmp %l5,%l3
/* 0x077c */ add %l4,2,%l4
/* 0x0780 */ fmuld %f40,%f48,%f54
/* 0x0784 */ add %g3,8,%g3
/* 0x0788 */ faddd %f58,%f54,%f52
/* 0x078c */ faddd %f52,%f56,%f62
/* 0x0790 */ std %f62,[%l2+%g2]
/* 0x0794 */ ble,a,pt %icc,.L900000850
/* 0x0798 */ ldd [%g4],%f44
.L77000368:
/* 0x079c 344 */ cmp %o5,15
/* 0x07a0 */ bne,pn %icc,.L77000483
/* 0x07a4 345 */ srl %l1,31,%g4
.L77000478:
/* 0x07a8 345 */ add %l1,%g4,%l4
/* 0x07ac */ sra %l4,1,%o7
/* 0x07b0 */ add %o7,1,%o4
/* 0x07b4 */ sll %o4,1,%l6
/* 0x07b8 */ cmp %l6,%o2
/* 0x07bc */ bge,pn %icc,.L77000392
/* 0x07c0 */ fmovd %f0,%f42
.L77000508:
/* 0x07c4 345 */ sra %l6,0,%l4
/* 0x07c8 */ sllx %l4,3,%g2
/* 0x07cc */ fmovd %f0,%f32
/* 0x07d0 */ sub %o2,1,%l5
/* 0x07d4 */ ldd [%g2+%i3],%f40
/* 0x07d8 */ add %g2,%i3,%g3
.L900000849:
/* 0x07dc 345 */ fdtox %f40,%f10
/* 0x07e0 */ ldd [%g3+8],%f52
/* 0x07e4 */ add %l6,2,%l6
/* 0x07e8 */ cmp %l6,%l5
/* 0x07ec */ fdtox %f52,%f2
/* 0x07f0 */ fmovd %f10,%f30
/* 0x07f4 */ fmovs %f0,%f10
/* 0x07f8 */ fmovs %f0,%f2
/* 0x07fc */ fxtod %f10,%f10
/* 0x0800 */ fxtod %f2,%f2
/* 0x0804 */ fdtox %f52,%f28
/* 0x0808 */ faddd %f10,%f32,%f56
/* 0x080c */ std %f56,[%g3]
/* 0x0810 */ faddd %f2,%f42,%f62
/* 0x0814 */ std %f62,[%g3+8]
/* 0x0818 */ fitod %f30,%f32
/* 0x081c */ add %g3,16,%g3
/* 0x0820 */ fitod %f28,%f42
/* 0x0824 */ ble,a,pt %icc,.L900000849
/* 0x0828 */ ldd [%g3],%f40
.L77000392:
/* 0x082c 346 */ or %g0,0,%o5
.L77000483:
/* 0x0830 350 */ fdtox %f34,%f6
/* 0x0834 */ add %l1,1,%l1
/* 0x0838 */ cmp %l1,%o3
/* 0x083c */ add %o5,1,%o5
/* 0x0840 */ add %l2,8,%l2
/* 0x0844 */ add %l0,8,%l0
/* 0x0848 */ fmovs %f0,%f6
/* 0x084c */ fxtod %f6,%f46
/* 0x0850 */ fmuld %f46,%f14,%f56
/* 0x0854 */ fmuld %f56,%f36,%f44
/* 0x0858 */ fdtox %f44,%f48
/* 0x085c */ fxtod %f48,%f58
/* 0x0860 */ fmuld %f58,%f38,%f54
/* 0x0864 */ fsubd %f56,%f54,%f40
/* 0x0868 */ ble,a,pt %icc,.L900000848
/* 0x086c 337 */ ldd [%i4],%f42
.L77000378:
/* 0x0870 409 */ ldx [%i3+%o0],%l1
.L900000852:
/* 0x0874 409 */ add %i3,%o0,%l4
/* 0x0878 */ ldx [%l4+8],%i1
/* 0x087c */ cmp %l1,0
/* 0x0880 */ bne,pn %xcc,.L77000403
/* 0x0884 */ or %g0,0,%g5
.L77000402:
/* 0x0888 409 */ or %g0,0,%i3
/* 0x088c */ ba .L900000847
/* 0x0890 */ cmp %i1,0
.L77000403:
/* 0x0894 409 */ srlx %l1,52,%o5
/* 0x0898 */ sethi %hi(0xfff00000),%i3
/* 0x089c */ sllx %i3,32,%o2
/* 0x08a0 */ sethi %hi(0x40000000),%o0
/* 0x08a4 */ sllx %o0,22,%o4
/* 0x08a8 */ or %g0,1023,%l0
/* 0x08ac */ xor %o2,-1,%o3
/* 0x08b0 */ sub %l0,%o5,%o7
/* 0x08b4 */ and %l1,%o3,%l1
/* 0x08b8 */ add %o7,52,%i4
/* 0x08bc */ or %l1,%o4,%o1
/* 0x08c0 */ cmp %i1,0
/* 0x08c4 */ srlx %o1,%i4,%i3
.L900000847:
/* 0x08c8 409 */ bne,pn %xcc,.L77000409
/* 0x08cc */ or %g0,0,%o7
.L77000408:
/* 0x08d0 409 */ ba .L900000846
/* 0x08d4 350 */ cmp %g1,0
.L77000409:
/* 0x08d8 409 */ srlx %i1,52,%l2
/* 0x08dc */ sethi %hi(0xfff00000),%o7
/* 0x08e0 */ sllx %o7,32,%i4
/* 0x08e4 */ sethi %hi(0x40000000),%i5
/* 0x08e8 */ sllx %i5,22,%l6
/* 0x08ec */ or %g0,1023,%l5
/* 0x08f0 */ xor %i4,-1,%o1
/* 0x08f4 */ sub %l5,%l2,%g2
/* 0x08f8 */ and %i1,%o1,%l7
/* 0x08fc */ add %g2,52,%g3
/* 0x0900 */ or %l7,%l6,%g4
/* 0x0904 350 */ cmp %g1,0
/* 0x0908 409 */ srlx %g4,%g3,%o7
.L900000846:
/* 0x090c 350 */ ble,pn %icc,.L77000397
/* 0x0910 */ or %g0,0,%l5
.L77000510:
/* 0x0914 409 */ sethi %hi(0xfff00000),%g4
/* 0x0918 */ sllx %g4,32,%o0
/* 0x091c 0 */ or %g0,-1,%i5
/* 0x0920 409 */ srl %i5,0,%l7
/* 0x0924 */ sethi %hi(0x40000000),%i1
/* 0x0928 */ sllx %i1,22,%l6
/* 0x092c */ sethi %hi(0xfc00),%i4
/* 0x0930 */ xor %o0,-1,%g2
/* 0x0934 */ add %i4,1023,%l2
/* 0x0938 */ or %g0,2,%g4
/* 0x093c */ or %g0,%i2,%g3
.L77000395:
/* 0x0940 409 */ sra %g4,0,%o2
/* 0x0944 */ add %g4,1,%o3
/* 0x0948 */ sllx %o2,3,%o0
/* 0x094c */ sra %o3,0,%o5
/* 0x0950 */ ldx [%l4+%o0],%o4
/* 0x0954 */ sllx %o5,3,%l0
/* 0x0958 */ and %i3,%l7,%o1
/* 0x095c */ ldx [%l4+%l0],%i4
/* 0x0960 */ cmp %o4,0
/* 0x0964 */ bne,pn %xcc,.L77000415
/* 0x0968 350 */ and %o7,%l2,%i5
.L77000414:
/* 0x096c 409 */ or %g0,0,%l1
/* 0x0970 */ ba .L900000845
/* 0x0974 */ add %g5,%o1,%i1
.L77000415:
/* 0x0978 409 */ srlx %o4,52,%o3
/* 0x097c */ and %o4,%g2,%l1
/* 0x0980 */ or %g0,52,%o0
/* 0x0984 */ sub %o3,1023,%l0
/* 0x0988 */ or %l1,%l6,%o4
/* 0x098c */ sub %o0,%l0,%o5
/* 0x0990 */ srlx %o4,%o5,%l1
/* 0x0994 */ add %g5,%o1,%i1
.L900000845:
/* 0x0998 409 */ srax %i3,32,%g5
/* 0x099c */ cmp %i4,0
/* 0x09a0 */ bne,pn %xcc,.L77000421
/* 0x09a4 350 */ sllx %i5,16,%o2
.L77000420:
/* 0x09a8 409 */ or %g0,0,%o4
/* 0x09ac */ ba .L900000844
/* 0x09b0 350 */ add %i1,%o2,%o5
.L77000421:
/* 0x09b4 409 */ srlx %i4,52,%o4
/* 0x09b8 */ or %g0,52,%o0
/* 0x09bc */ sub %o4,1023,%o3
/* 0x09c0 */ and %i4,%g2,%i3
/* 0x09c4 */ or %i3,%l6,%o5
/* 0x09c8 */ sub %o0,%o3,%l0
/* 0x09cc */ srlx %o5,%l0,%o4
/* 0x09d0 350 */ add %i1,%o2,%o5
.L900000844:
/* 0x09d4 350 */ srax %o7,16,%i4
/* 0x09d8 */ srax %o5,32,%i5
/* 0x09dc */ add %i4,%i5,%o1
/* 0x09e0 */ add %l5,1,%l5
/* 0x09e4 */ and %o5,%l7,%i1
/* 0x09e8 */ add %g5,%o1,%g5
/* 0x09ec */ st %i1,[%g3]
/* 0x09f0 */ or %g0,%l1,%i3
/* 0x09f4 */ or %g0,%o4,%o7
/* 0x09f8 */ add %g4,2,%g4
/* 0x09fc */ cmp %l5,%l3
/* 0x0a00 */ ble,pt %icc,.L77000395
/* 0x0a04 */ add %g3,4,%g3
.L77000397:
/* 0x0a08 409 */ sethi %hi(0xfc00),%l4
/* 0x0a0c */ sra %l5,0,%i5
/* 0x0a10 */ add %l4,1023,%i1
/* 0x0a14 */ add %g5,%i3,%l5
/* 0x0a18 */ and %o7,%i1,%g5
/* 0x0a1c */ sllx %g5,16,%l2
/* 0x0a20 */ sllx %i5,2,%l7
/* 0x0a24 413 */ sra %g1,0,%g2
/* 0x0a28 409 */ add %l5,%l2,%l6
/* 0x0a2c */ st %l6,[%i2+%l7]
/* 0x0a30 413 */ sllx %g2,2,%g3
/* 0x0a34 */ ld [%i2+%g3],%g4
/* 0x0a38 */ cmp %g4,0
/* 0x0a3c */ bgu,pn %icc,.L77000486
/* 0x0a40 */ cmp %l3,0
.L77000427:
/* 0x0a44 413 */ bl,pn %icc,.L77000486
/* 0x0a48 */ or %g0,%l3,%i5
.L77000512:
/* 0x0a4c 413 */ sra %l3,0,%o5
/* 0x0a50 */ sllx %o5,2,%l7
/* 0x0a54 */ ld [%l7+%i0],%o5
/* 0x0a58 */ add %l7,%i2,%o1
/* 0x0a5c */ add %l7,%i0,%i4
.L900000843:
/* 0x0a60 413 */ ld [%o1],%i1
/* 0x0a64 */ cmp %i1,%o5
/* 0x0a68 */ bne,pn %icc,.L77000435
/* 0x0a6c */ sub %o1,4,%o1
.L77000431:
/* 0x0a70 413 */ sub %i4,4,%i4
/* 0x0a74 */ subcc %i5,1,%i5
/* 0x0a78 */ bpos,a,pt %icc,.L900000843
/* 0x0a7c */ ld [%i4],%o5
.L900000827:
/* 0x0a80 413 */ ba .L900000842
/* 0x0a84 350 */ cmp %g1,0
.L77000435:
/* 0x0a88 413 */ sra %i5,0,%o0
/* 0x0a8c */ sllx %o0,2,%l1
/* 0x0a90 */ ld [%i0+%l1],%i3
/* 0x0a94 */ ld [%i2+%l1],%l0
/* 0x0a98 */ cmp %l0,%i3
/* 0x0a9c */ bleu,pt %icc,.L77000379
/* 0x0aa0 */ nop
.L77000486:
/* 0x0aa4 350 */ cmp %g1,0
.L900000842:
/* 0x0aa8 350 */ ble,pn %icc,.L77000379
/* 0x0aac */ add %l3,1,%g3
.L77000511:
/* 0x0ab0 350 */ or %g0,0,%l5
/* 0x0ab4 */ cmp %g3,10
/* 0x0ab8 */ bl,pn %icc,.L77000487
/* 0x0abc */ or %g0,0,%g1
.L900000835:
/* 0x0ac0 350 */ prefetch [%i2],22
/* 0x0ac4 */ add %i0,4,%l2
/* 0x0ac8 */ prefetch [%i2+64],22
/* 0x0acc */ add %i2,8,%o5
/* 0x0ad0 */ sub %l3,7,%i0
/* 0x0ad4 */ prefetch [%i2+128],22
/* 0x0ad8 */ or %g0,2,%l5
/* 0x0adc */ prefetch [%i2+192],22
/* 0x0ae0 */ prefetch [%i2+256],22
/* 0x0ae4 */ prefetch [%i2+320],22
/* 0x0ae8 */ prefetch [%i2+384],22
/* 0x0aec */ ld [%l2-4],%l7
/* 0x0af0 */ ld [%o5-4],%l6
/* 0x0af4 */ prefetch [%o5+440],22
/* 0x0af8 */ prefetch [%o5+504],22
/* 0x0afc */ ld [%i2],%i2
/* 0x0b00 */ sub %i2,%l7,%g3
/* 0x0b04 */ st %g3,[%o5-8]
/* 0x0b08 */ srax %g3,32,%l7
.L900000833:
/* 0x0b0c 350 */ add %l5,8,%l5
/* 0x0b10 */ add %o5,32,%o5
/* 0x0b14 */ ld [%l2],%i5
/* 0x0b18 */ prefetch [%o5+496],22
/* 0x0b1c */ cmp %l5,%i0
/* 0x0b20 */ add %l2,32,%l2
/* 0x0b24 */ sub %l6,%i5,%g5
/* 0x0b28 */ add %g5,%l7,%o0
/* 0x0b2c */ ld [%o5-32],%l4
/* 0x0b30 */ st %o0,[%o5-36]
/* 0x0b34 */ srax %o0,32,%i3
/* 0x0b38 */ ld [%l2-28],%i1
/* 0x0b3c */ sub %l4,%i1,%i4
/* 0x0b40 */ add %i4,%i3,%o1
/* 0x0b44 */ ld [%o5-28],%o3
/* 0x0b48 */ st %o1,[%o5-32]
/* 0x0b4c */ srax %o1,32,%l1
/* 0x0b50 */ ld [%l2-24],%o2
/* 0x0b54 */ sub %o3,%o2,%g2
/* 0x0b58 */ add %g2,%l1,%o7
/* 0x0b5c */ ld [%o5-24],%l0
/* 0x0b60 */ st %o7,[%o5-28]
/* 0x0b64 */ srax %o7,32,%l6
/* 0x0b68 */ ld [%l2-20],%o4
/* 0x0b6c */ sub %l0,%o4,%g1
/* 0x0b70 */ add %g1,%l6,%l7
/* 0x0b74 */ ld [%o5-20],%i2
/* 0x0b78 */ st %l7,[%o5-24]
/* 0x0b7c */ srax %l7,32,%g4
/* 0x0b80 */ ld [%l2-16],%g3
/* 0x0b84 */ sub %i2,%g3,%i5
/* 0x0b88 */ add %i5,%g4,%g5
/* 0x0b8c */ ld [%o5-16],%i1
/* 0x0b90 */ st %g5,[%o5-20]
/* 0x0b94 */ srax %g5,32,%l4
/* 0x0b98 */ ld [%l2-12],%o0
/* 0x0b9c */ sub %i1,%o0,%i3
/* 0x0ba0 */ add %i3,%l4,%i4
/* 0x0ba4 */ ld [%o5-12],%o2
/* 0x0ba8 */ st %i4,[%o5-16]
/* 0x0bac */ srax %i4,32,%o3
/* 0x0bb0 */ ld [%l2-8],%o1
/* 0x0bb4 */ sub %o2,%o1,%l1
/* 0x0bb8 */ add %l1,%o3,%g2
/* 0x0bbc */ ld [%o5-8],%o4
/* 0x0bc0 */ st %g2,[%o5-12]
/* 0x0bc4 */ srax %g2,32,%l0
/* 0x0bc8 */ ld [%l2-4],%o7
/* 0x0bcc */ sub %o4,%o7,%l6
/* 0x0bd0 */ add %l6,%l0,%g1
/* 0x0bd4 */ ld [%o5-4],%l6
/* 0x0bd8 */ st %g1,[%o5-8]
/* 0x0bdc */ ble,pt %icc,.L900000833
/* 0x0be0 */ srax %g1,32,%l7
.L900000836:
/* 0x0be4 350 */ ld [%l2],%l0
/* 0x0be8 */ add %l2,4,%i0
/* 0x0bec */ or %g0,%o5,%i2
/* 0x0bf0 */ cmp %l5,%l3
/* 0x0bf4 */ sub %l6,%l0,%l6
/* 0x0bf8 */ add %l6,%l7,%g1
/* 0x0bfc */ st %g1,[%o5-4]
/* 0x0c00 */ bg,pn %icc,.L77000379
/* 0x0c04 */ srax %g1,32,%g1
.L77000487:
/* 0x0c08 350 */ ld [%i2],%o4
.L900000841:
/* 0x0c0c 350 */ ld [%i0],%i3
/* 0x0c10 */ add %g1,%o4,%l0
/* 0x0c14 */ add %l5,1,%l5
/* 0x0c18 */ cmp %l5,%l3
/* 0x0c1c */ add %i0,4,%i0
/* 0x0c20 */ sub %l0,%i3,%l6
/* 0x0c24 */ st %l6,[%i2]
/* 0x0c28 */ srax %l6,32,%g1
/* 0x0c2c */ add %i2,4,%i2
/* 0x0c30 */ ble,a,pt %icc,.L900000841
/* 0x0c34 */ ld [%i2],%o4
.L77000379:
/* 0x0c38 405 */ ret ! Result =
/* 0x0c3c */ restore %g0,%g0,%g0
/* 0x0c40 0 */ .type mont_mulf_noconv,2
/* 0x0c40 0 */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
! Begin Disassembling Debug Info
.xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0
.xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o mont_mulf.c",52,0,0,0
! End Disassembling Debug Info
! Begin Disassembling Ident
.ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE)
.ident "@(#)mont_mulf.c\t1.2\t01/09/24 SMI" ! (/tmp/acompAAApja4Fx:8)
.ident "@(#)types.h\t1.74\t03/08/07 SMI" ! (/tmp/acompAAApja4Fx:9)
.ident "@(#)isa_defs.h\t1.20\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:10)
.ident "@(#)feature_tests.h\t1.18\t99/07/26 SMI" ! (/tmp/acompAAApja4Fx:11)
.ident "@(#)machtypes.h\t1.13\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:12)
.ident "@(#)inttypes.h\t1.2\t98/01/16 SMI" ! (/tmp/acompAAApja4Fx:13)
.ident "@(#)int_types.h\t1.6\t97/08/20 SMI" ! (/tmp/acompAAApja4Fx:14)
.ident "@(#)int_limits.h\t1.6\t99/08/06 SMI" ! (/tmp/acompAAApja4Fx:15)
.ident "@(#)int_const.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:16)
.ident "@(#)int_fmtio.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:17)
.ident "@(#)types32.h\t1.4\t98/02/13 SMI" ! (/tmp/acompAAApja4Fx:18)
.ident "@(#)select.h\t1.17\t01/08/15 SMI" ! (/tmp/acompAAApja4Fx:19)
.ident "@(#)math.h\t2.11\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:20)
.ident "@(#)math_iso.h\t1.2\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:21)
.ident "@(#)floatingpoint.h\t2.5\t99/06/22 SMI" ! (/tmp/acompAAApja4Fx:22)
.ident "@(#)stdio_tag.h\t1.3\t98/04/20 SMI" ! (/tmp/acompAAApja4Fx:23)
.ident "@(#)ieeefp.h\t2.8 99/10/29" ! (/tmp/acompAAApja4Fx:24)
.ident "acomp: Sun C 5.5 Patch 112760-07 2004/02/03" ! (/tmp/acompAAApja4Fx:57)
.ident "iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (/tmp/acompAAApja4Fx:58)
.ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE)
! End Disassembling Ident
#define FZERO \
fzero %f0 ;\
fzero %f2 ;\
faddd %f0, %f2, %f4 ;\
fmuld %f0, %f2, %f6 ;\
faddd %f0, %f2, %f8 ;\
fmuld %f0, %f2, %f10 ;\
faddd %f0, %f2, %f12 ;\
fmuld %f0, %f2, %f14 ;\
faddd %f0, %f2, %f16 ;\
fmuld %f0, %f2, %f18 ;\
faddd %f0, %f2, %f20 ;\
fmuld %f0, %f2, %f22 ;\
faddd %f0, %f2, %f24 ;\
fmuld %f0, %f2, %f26 ;\
faddd %f0, %f2, %f28 ;\
fmuld %f0, %f2, %f30 ;\
faddd %f0, %f2, %f32 ;\
fmuld %f0, %f2, %f34 ;\
faddd %f0, %f2, %f36 ;\
fmuld %f0, %f2, %f38 ;\
faddd %f0, %f2, %f40 ;\
fmuld %f0, %f2, %f42 ;\
faddd %f0, %f2, %f44 ;\
fmuld %f0, %f2, %f46 ;\
faddd %f0, %f2, %f48 ;\
fmuld %f0, %f2, %f50 ;\
faddd %f0, %f2, %f52 ;\
fmuld %f0, %f2, %f54 ;\
faddd %f0, %f2, %f56 ;\
fmuld %f0, %f2, %f58 ;\
faddd %f0, %f2, %f60 ;\
fmuld %f0, %f2, %f62
#include "assym.h"
/*
* In the routine below, we check/set FPRS_FEF bit since
* we don't want to take a fp_disabled trap. We need not
* check/set PSTATE_PEF bit as it is done early during boot.
*/
ENTRY(big_savefp)
rd %fprs, %o2
st %o2, [%o0 + FPU_FPRS]
andcc %o2, FPRS_FEF, %g0 ! is FPRS_FEF set?
bnz,a,pt %icc, .fregs_save ! yes, go to save
nop
wr %g0, FPRS_FEF, %fprs ! else, set the bit
stx %fsr, [%o0 + FPU_FSR] ! store %fsr
retl
nop
.fregs_save:
BSTORE_FPREGS(%o0, %o4)
stx %fsr, [%o0 + FPU_FSR] ! store %fsr
retl
nop
SET_SIZE(big_savefp)
ENTRY(big_restorefp)
ldx [%o0 + FPU_FSR], %fsr ! restore %fsr
ld [%o0 + FPU_FPRS], %o1
andcc %o1, FPRS_FEF, %g0 ! is FPRS_FEF set in saved %fprs?
bnz,pt %icc, .fregs_restore ! yes, go to restore
nop
FZERO ! zero out to avoid leaks
wr %g0, 0, %fprs
retl
nop
.fregs_restore:
BLOAD_FPREGS(%o0, %o2)
wr %o1, 0, %fprs
retl
nop
SET_SIZE(big_restorefp)
#endif /* lint || __lint */