fenv_inlines.h revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
* Copyright 2011, Richard Lowe
*/
#ifndef _FENV_INLINES_H
#define _FENV_INLINES_H
#ifdef __GNUC__
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/types.h>
#if defined(__x86)
/*
* Floating point Control Word and Status Word
* Definition should actually be shared with x86
* (much of this 'amd64' code can be, in fact.)
*/
union fp_cwsw {
uint32_t cwsw;
struct {
uint16_t cw;
uint16_t sw;
} words;
};
extern __inline__ void
__fenv_getcwsw(unsigned int *value)
{
union fp_cwsw *u = (union fp_cwsw *)value;
__asm__ __volatile__(
"fstsw %0\n\t"
"fstcw %1\n\t"
: "=m" (u->words.cw), "=m" (u->words.sw));
}
extern __inline__ void
__fenv_setcwsw(const unsigned int *value)
{
union fp_cwsw cwsw;
short fenv[16];
cwsw.cwsw = *value;
__asm__ __volatile__(
"fstenv %0\n\t"
"movw %4,%1\n\t"
"movw %3,%2\n\t"
"fldenv %0\n\t"
"fwait\n\t"
: "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
: "r" (cwsw.words.cw), "r" (cwsw.words.sw)
/* For practical purposes, we clobber the whole FPU */
: "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
"st(6)", "st(7)");
}
extern __inline__ void
__fenv_getmxcsr(unsigned int *value)
{
__asm__ __volatile__("stmxcsr %0" : "=m" (*value));
}
extern __inline__ void
__fenv_setmxcsr(const unsigned int *value)
{
__asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
}
extern __inline__ long double
f2xm1(long double x)
{
long double ret;
__asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
return (ret);
}
extern __inline__ long double
fyl2x(long double y, long double x)
{
long double ret;
__asm__ __volatile__("fyl2x"
: "=t" (ret)
: "0" (x), "u" (y)
: "st(1)", "cc");
return (ret);
}
extern __inline__ long double
fptan(long double x)
{
/*
* fptan pushes 1.0 then the result on completion, so we want to pop
* the FP stack twice, so we need a dummy value into which to pop it.
*/
long double ret;
long double dummy;
__asm__ __volatile__("fptan"
: "=t" (dummy), "=u" (ret)
: "0" (x)
: "cc");
return (ret);
}
extern __inline__ long double
fpatan(long double x, long double y)
{
long double ret;
__asm__ __volatile__("fpatan"
: "=t" (ret)
: "0" (y), "u" (x)
: "st(1)", "cc");
return (ret);
}
extern __inline__ long double
fxtract(long double x)
{
__asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
return (x);
}
extern __inline__ long double
fprem1(long double idend, long double div)
{
__asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
return (div);
}
extern __inline__ long double
fprem(long double idend, long double div)
{
__asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
return (div);
}
extern __inline__ long double
fyl2xp1(long double y, long double x)
{
long double ret;
__asm__ __volatile__("fyl2xp1"
: "=t" (ret)
: "0" (x), "u" (y)
: "st(1)", "cc");
return (ret);
}
extern __inline__ long double
fsqrt(long double x)
{
__asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
return (x);
}
extern __inline__ long double
fsincos(long double x)
{
long double dummy;
__asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
return (x);
}
extern __inline__ long double
frndint(long double x)
{
__asm__ __volatile__("frndint" : "+t" (x) : : "cc");
return (x);
}
extern __inline__ long double
fscale(long double x, long double y)
{
long double ret;
__asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
return (ret);
}
extern __inline__ long double
fsin(long double x)
{
__asm__ __volatile__("fsin" : "+t" (x) : : "cc");
return (x);
}
extern __inline__ long double
fcos(long double x)
{
__asm__ __volatile__("fcos" : "+t" (x) : : "cc");
return (x);
}
extern __inline__ void
sse_cmpeqss(float *f1, float *f2, int *i1)
{
__asm__ __volatile__(
"cmpeqss %2, %1\n\t"
"movss %1, %0"
: "=m" (*i1), "+x" (*f1)
: "x" (*f2)
: "cc");
}
extern __inline__ void
sse_cmpltss(float *f1, float *f2, int *i1)
{
__asm__ __volatile__(
"cmpltss %2, %1\n\t"
"movss %1, %0"
: "=m" (*i1), "+x" (*f1)
: "x" (*f2)
: "cc");
}
extern __inline__ void
sse_cmpless(float *f1, float *f2, int *i1)
{
__asm__ __volatile__(
"cmpless %2, %1\n\t"
"movss %1, %0"
: "=m" (*i1), "+x" (*f1)
: "x" (*f2)
: "cc");
}
extern __inline__ void
sse_cmpunordss(float *f1, float *f2, int *i1)
{
__asm__ __volatile__(
"cmpunordss %2, %1\n\t"
"movss %1, %0"
: "=m" (*i1), "+x" (*f1)
: "x" (*f2)
: "cc");
}
extern __inline__ void
sse_minss(float *f1, float *f2, float *f3)
{
__asm__ __volatile__(
"minss %2, %1\n\t"
"movss %1, %0"
: "=m" (*f3), "+x" (*f1)
: "x" (*f2));
}
extern __inline__ void
sse_maxss(float *f1, float *f2, float *f3)
{
__asm__ __volatile__(
"maxss %2, %1\n\t"
"movss %1, %0"
: "=m" (*f3), "+x" (*f1)
: "x" (*f2));
}
extern __inline__ void
sse_addss(float *f1, float *f2, float *f3)
{
__asm__ __volatile__(
"addss %2, %1\n\t"
"movss %1, %0"
: "=m" (*f3), "+x" (*f1)
: "x" (*f2));
}
extern __inline__ void
sse_subss(float *f1, float *f2, float *f3)
{
__asm__ __volatile__(
"subss %2, %1\n\t"
"movss %1, %0"
: "=m" (*f3), "+x" (*f1)
: "x" (*f2));
}
extern __inline__ void
sse_mulss(float *f1, float *f2, float *f3)
{
__asm__ __volatile__(
"mulss %2, %1\n\t"
"movss %1, %0"
: "=m" (*f3), "+x" (*f1)
: "x" (*f2));
}
extern __inline__ void
sse_divss(float *f1, float *f2, float *f3)
{
__asm__ __volatile__(
"divss %2, %1\n\t"
"movss %1, %0"
: "=m" (*f3), "+x" (*f1)
: "x" (*f2));
}
extern __inline__ void
sse_sqrtss(float *f1, float *f2)
{
double tmp;
__asm__ __volatile__(
"sqrtss %2, %1\n\t"
"movss %1, %0"
: "=m" (*f2), "=x" (tmp)
: "m" (*f1));
}
extern __inline__ void
sse_ucomiss(float *f1, float *f2)
{
__asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
}
extern __inline__ void
sse_comiss(float *f1, float *f2)
{
__asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
}
extern __inline__ void
sse_cvtss2sd(float *f1, double *d1)
{
double tmp;
__asm__ __volatile__(
"cvtss2sd %2, %1\n\t"
"movsd %1, %0"
: "=m" (*d1), "=x" (tmp)
: "m" (*f1));
}
extern __inline__ void
sse_cvtsi2ss(int *i1, float *f1)
{
double tmp;
__asm__ __volatile__(
"cvtsi2ss %2, %1\n\t"
"movss %1, %0"
: "=m" (*f1), "=x" (tmp)
: "m" (*i1));
}
extern __inline__ void
sse_cvttss2si(float *f1, int *i1)
{
int tmp;
__asm__ __volatile__(
"cvttss2si %2, %1\n\t"
"movl %1, %0"
: "=m" (*i1), "=r" (tmp)
: "m" (*f1));
}
extern __inline__ void
sse_cvtss2si(float *f1, int *i1)
{
int tmp;
__asm__ __volatile__(
"cvtss2si %2, %1\n\t"
"movl %1, %0"
: "=m" (*i1), "=r" (tmp)
: "m" (*f1));
}
#if defined(__amd64)
extern __inline__ void
sse_cvtsi2ssq(long long *ll1, float *f1)
{
double tmp;
__asm__ __volatile__(
"cvtsi2ssq %2, %1\n\t"
"movss %1, %0"
: "=m" (*f1), "=x" (tmp)
: "m" (*ll1));
}
extern __inline__ void
sse_cvttss2siq(float *f1, long long *ll1)
{
uint64_t tmp;
__asm__ __volatile__(
"cvttss2siq %2, %1\n\t"
"movq %1, %0"
: "=m" (*ll1), "=r" (tmp)
: "m" (*f1));
}
extern __inline__ void
sse_cvtss2siq(float *f1, long long *ll1)
{
uint64_t tmp;
__asm__ __volatile__(
"cvtss2siq %2, %1\n\t"
"movq %1, %0"
: "=m" (*ll1), "=r" (tmp)
: "m" (*f1));
}
#endif
extern __inline__ void
sse_cmpeqsd(double *d1, double *d2, long long *ll1)
{
__asm__ __volatile__(
"cmpeqsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*ll1), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_cmpltsd(double *d1, double *d2, long long *ll1)
{
__asm__ __volatile__(
"cmpltsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*ll1), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_cmplesd(double *d1, double *d2, long long *ll1)
{
__asm__ __volatile__(
"cmplesd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*ll1), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_cmpunordsd(double *d1, double *d2, long long *ll1)
{
__asm__ __volatile__(
"cmpunordsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*ll1), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_minsd(double *d1, double *d2, double *d3)
{
__asm__ __volatile__(
"minsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*d3), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_maxsd(double *d1, double *d2, double *d3)
{
__asm__ __volatile__(
"maxsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*d3), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_addsd(double *d1, double *d2, double *d3)
{
__asm__ __volatile__(
"addsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*d3), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_subsd(double *d1, double *d2, double *d3)
{
__asm__ __volatile__(
"subsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*d3), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_mulsd(double *d1, double *d2, double *d3)
{
__asm__ __volatile__(
"mulsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*d3), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_divsd(double *d1, double *d2, double *d3)
{
__asm__ __volatile__(
"divsd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*d3), "+x" (*d1)
: "x" (*d2));
}
extern __inline__ void
sse_sqrtsd(double *d1, double *d2)
{
double tmp;
__asm__ __volatile__(
"sqrtsd %2, %1\n\t"
"movsd %1, %0"
: "=m" (*d2), "=x" (tmp)
: "m" (*d1));
}
extern __inline__ void
sse_ucomisd(double *d1, double *d2)
{
__asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
}
extern __inline__ void
sse_comisd(double *d1, double *d2)
{
__asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
}
extern __inline__ void
sse_cvtsd2ss(double *d1, float *f1)
{
double tmp;
__asm__ __volatile__(
"cvtsd2ss %2,%1\n\t"
"movss %1,%0"
: "=m" (*f1), "=x" (tmp)
: "m" (*d1));
}
extern __inline__ void
sse_cvtsi2sd(int *i1, double *d1)
{
double tmp;
__asm__ __volatile__(
"cvtsi2sd %2,%1\n\t"
"movsd %1,%0"
: "=m" (*d1), "=x" (tmp)
: "m" (*i1));
}
extern __inline__ void
sse_cvttsd2si(double *d1, int *i1)
{
int tmp;
__asm__ __volatile__(
"cvttsd2si %2,%1\n\t"
"movl %1,%0"
: "=m" (*i1), "=r" (tmp)
: "m" (*d1));
}
extern __inline__ void
sse_cvtsd2si(double *d1, int *i1)
{
int tmp;
__asm__ __volatile__(
"cvtsd2si %2,%1\n\t"
"movl %1,%0"
: "=m" (*i1), "=r" (tmp)
: "m" (*d1));
}
#if defined(__amd64)
extern __inline__ void
sse_cvtsi2sdq(long long *ll1, double *d1)
{
double tmp;
__asm__ __volatile__(
"cvtsi2sdq %2,%1\n\t"
"movsd %1,%0"
: "=m" (*d1), "=x" (tmp)
: "m" (*ll1));
}
extern __inline__ void
sse_cvttsd2siq(double *d1, long long *ll1)
{
uint64_t tmp;
__asm__ __volatile__(
"cvttsd2siq %2,%1\n\t"
"movq %1,%0"
: "=m" (*ll1), "=r" (tmp)
: "m" (*d1));
}
extern __inline__ void
sse_cvtsd2siq(double *d1, long long *ll1)
{
uint64_t tmp;
__asm__ __volatile__(
"cvtsd2siq %2,%1\n\t"
"movq %1,%0"
: "=m" (*ll1), "=r" (tmp)
: "m" (*d1));
}
#endif
#elif defined(__sparc)
extern __inline__ void
__fenv_getfsr(unsigned long *l)
{
__asm__ __volatile__(
#if defined(__sparcv9)
"stx %%fsr,%0\n\t"
#else
"st %%fsr,%0\n\t"
#endif
: "=m" (*l));
}
extern __inline__ void
__fenv_setfsr(const unsigned long *l)
{
__asm__ __volatile__(
#if defined(__sparcv9)
"ldx %0,%%fsr\n\t"
#else
"ld %0,%%fsr\n\t"
#endif
: : "m" (*l) : "cc");
}
extern __inline__ void
__fenv_getfsr32(unsigned int *l)
{
__asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
}
extern __inline__ void
__fenv_setfsr32(const unsigned int *l)
{
__asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
}
#else
#error "GCC FENV inlines not implemented for this platform"
#endif
#ifdef __cplusplus
}
#endif
#endif /* __GNUC__ */
#endif /* _FENV_INLINES_H */