/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
/* All Rights Reserved */
/* Copyright (c) 1987, 1988 Microsoft Corporation */
/* All Rights Reserved */
/*
* Copyright (c) 2009, Intel Corporation.
* All rights reserved.
*/
#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
#include <sys/regset.h>
#include <sys/privregs.h>
#include <sys/x86_archext.h>
#if defined(__lint)
#include <sys/types.h>
#include <sys/fp.h>
#else
#include "assym.h"
#endif
#if defined(__lint)
uint_t
fpu_initial_probe(void)
{ return (0); }
#else /* __lint */
/*
* Returns zero if x87 "chip" is present(!)
*/
ENTRY_NP(fpu_initial_probe)
CLTS
fninit
fnstsw %ax
movzbl %al, %eax
ret
SET_SIZE(fpu_initial_probe)
#endif /* __lint */
#if defined(__lint)
/*ARGSUSED*/
void
fxsave_insn(struct fxsave_state *fx)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fxsave_insn)
FXSAVEQ ((%rdi))
ret
SET_SIZE(fxsave_insn)
#elif defined(__i386)
ENTRY_NP(fxsave_insn)
movl 4(%esp), %eax
fxsave (%eax)
ret
SET_SIZE(fxsave_insn)
#endif
#endif /* __lint */
#if defined(__i386)
/*
* If (num1/num2 > num1/num3) the FPU has the FDIV bug.
*/
#if defined(__lint)
int
fpu_probe_pentium_fdivbug(void)
{ return (0); }
#else /* __lint */
ENTRY_NP(fpu_probe_pentium_fdivbug)
fldl .num1
fldl .num2
fdivr %st(1), %st
fxch %st(1)
fdivl .num3
fcompp
fstsw %ax
sahf
jae 0f
movl $1, %eax
ret
0: xorl %eax, %eax
ret
.align 4
.num1: .4byte 0xbce4217d /* 4.999999 */
.4byte 0x4013ffff
.num2: .4byte 0x0 /* 15.0 */
.4byte 0x402e0000
.num3: .4byte 0xde7210bf /* 14.999999 */
.4byte 0x402dffff
SET_SIZE(fpu_probe_pentium_fdivbug)
#endif /* __lint */
/*
* To cope with processors that do not implement fxsave/fxrstor
* instructions, patch hot paths in the kernel to use them only
* when that feature has been detected.
*/
#if defined(__lint)
void
patch_sse(void)
{}
void
patch_sse2(void)
{}
void
patch_xsave(void)
{}
#else /* __lint */
ENTRY_NP(patch_sse)
_HOT_PATCH_PROLOG
/
/ frstor (%ebx); nop -> fxrstor (%ebx)
/
_HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3)
/
/ lock; xorl $0, (%esp) -> sfence; ret
/
_HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
_HOT_PATCH_EPILOG
ret
_fxrstor_ebx_insn: / see ndptrap_frstor()
fxrstor (%ebx)
_ldmxcsr_ebx_insn: / see resume_from_zombie()
ldmxcsr (%ebx)
_sfence_ret_insn: / see membar_producer()
.byte 0xf, 0xae, 0xf8 / [sfence instruction]
ret
SET_SIZE(patch_sse)
ENTRY_NP(patch_sse2)
_HOT_PATCH_PROLOG
/
/ lock; xorl $0, (%esp) -> lfence; ret
/
_HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
_HOT_PATCH_EPILOG
ret
_lfence_ret_insn: / see membar_consumer()
.byte 0xf, 0xae, 0xe8 / [lfence instruction]
ret
SET_SIZE(patch_sse2)
/*
* Patch lazy fp restore instructions in the trap handler
* to use xrstor instead of frstor
*/
ENTRY_NP(patch_xsave)
_HOT_PATCH_PROLOG
/
/ frstor (%ebx); nop -> xrstor (%ebx)
/
_HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3)
_HOT_PATCH_EPILOG
ret
_xrstor_ebx_insn: / see ndptrap_frstor()
#xrstor (%ebx)
.byte 0x0f, 0xae, 0x2b
SET_SIZE(patch_xsave)
#endif /* __lint */
#endif /* __i386 */
#if defined(__amd64)
#if defined(__lint)
void
patch_xsave(void)
{}
#else /* __lint */
/*
* Patch lazy fp restore instructions in the trap handler
* to use xrstor instead of fxrstorq
*/
ENTRY_NP(patch_xsave)
pushq %rbx
pushq %rbp
pushq %r15
/
/ FXRSTORQ (%rbx); -> xrstor (%rbx)
/ hot_patch(_xrstor_rbx_insn, _patch_xrstorq_rbx, 4)
/
leaq _patch_xrstorq_rbx(%rip), %rbx
leaq _xrstor_rbx_insn(%rip), %rbp
movq $4, %r15
1:
movq %rbx, %rdi /* patch address */
movzbq (%rbp), %rsi /* instruction byte */
movq $1, %rdx /* count */
call hot_patch_kernel_text
addq $1, %rbx
addq $1, %rbp
subq $1, %r15
jnz 1b
popq %r15
popq %rbp
popq %rbx
ret
_xrstor_rbx_insn: / see ndptrap_frstor()
#rex.W=1 (.byte 0x48)
#xrstor (%rbx)
.byte 0x48, 0x0f, 0xae, 0x2b
SET_SIZE(patch_xsave)
#endif /* __lint */
#endif /* __amd64 */
/*
* One of these routines is called from any lwp with floating
* point context as part of the prolog of a context switch.
*/
#if defined(__lint)
/*ARGSUSED*/
void
xsave_ctxt(void *arg)
{}
/*ARGSUSED*/
void
fpxsave_ctxt(void *arg)
{}
/*ARGSUSED*/
void
fpnsave_ctxt(void *arg)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpxsave_ctxt)
cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
jne 1f
movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
FXSAVEQ (FPU_CTX_FPU_REGS(%rdi))
/*
* On certain AMD processors, the "exception pointers" i.e. the last
* instruction pointer, last data pointer, and last opcode
* are saved by the fxsave instruction ONLY if the exception summary
* bit is set.
*
* To ensure that we don't leak these values into the next context
* on the cpu, we could just issue an fninit here, but that's
* rather slow and so we issue an instruction sequence that
* clears them more quickly, if a little obscurely.
*/
btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */
jnc 0f /* jump if ES = 0 */
fnclex /* clear pending x87 exceptions */
0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
fildl .fpzero_const(%rip)
/* dummy load changes all exception pointers */
STTS(%rsi) /* trap on next fpu touch */
1: rep; ret /* use 2 byte return instruction when branch target */
/* AMD Software Optimization Guide - Section 6.2 */
SET_SIZE(fpxsave_ctxt)
ENTRY_NP(xsave_ctxt)
cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
jne 1f
movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
/*
* Setup xsave flags in EDX:EAX
*/
movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
leaq FPU_CTX_FPU_REGS(%rdi), %rsi
#xsave (%rsi)
.byte 0x0f, 0xae, 0x26
/*
* (see notes above about "exception pointers")
* TODO: does it apply to any machine that uses xsave?
*/
btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */
jnc 0f /* jump if ES = 0 */
fnclex /* clear pending x87 exceptions */
0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
fildl .fpzero_const(%rip)
/* dummy load changes all exception pointers */
STTS(%rsi) /* trap on next fpu touch */
1: ret
SET_SIZE(xsave_ctxt)
#elif defined(__i386)
ENTRY_NP(fpnsave_ctxt)
movl 4(%esp), %eax /* a struct fpu_ctx */
cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
jne 1f
movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
fnsave FPU_CTX_FPU_REGS(%eax)
/* (fnsave also reinitializes x87 state) */
STTS(%edx) /* trap on next fpu touch */
1: rep; ret /* use 2 byte return instruction when branch target */
/* AMD Software Optimization Guide - Section 6.2 */
SET_SIZE(fpnsave_ctxt)
ENTRY_NP(fpxsave_ctxt)
movl 4(%esp), %eax /* a struct fpu_ctx */
cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
jne 1f
movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
fxsave FPU_CTX_FPU_REGS(%eax)
/* (see notes above about "exception pointers") */
btw $7, FXSAVE_STATE_FSW(%eax) /* Test saved ES bit */
jnc 0f /* jump if ES = 0 */
fnclex /* clear pending x87 exceptions */
0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
fildl .fpzero_const
/* dummy load changes all exception pointers */
STTS(%edx) /* trap on next fpu touch */
1: rep; ret /* use 2 byte return instruction when branch target */
/* AMD Software Optimization Guide - Section 6.2 */
SET_SIZE(fpxsave_ctxt)
ENTRY_NP(xsave_ctxt)
movl 4(%esp), %ecx /* a struct fpu_ctx */
cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
jne 1f
movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
leal FPU_CTX_FPU_REGS(%ecx), %ecx
#xsave (%ecx)
.byte 0x0f, 0xae, 0x21
/*
* (see notes above about "exception pointers")
* TODO: does it apply to any machine that uses xsave?
*/
btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */
jnc 0f /* jump if ES = 0 */
fnclex /* clear pending x87 exceptions */
0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
fildl .fpzero_const
/* dummy load changes all exception pointers */
STTS(%edx) /* trap on next fpu touch */
1: ret
SET_SIZE(xsave_ctxt)
#endif /* __i386 */
.align 8
.fpzero_const:
.4byte 0x0
.4byte 0x0
#endif /* __lint */
#if defined(__lint)
/*ARGSUSED*/
void
fpsave(struct fnsave_state *f)
{}
/*ARGSUSED*/
void
fpxsave(struct fxsave_state *f)
{}
/*ARGSUSED*/
void
xsave(struct xsave_state *f, uint64_t m)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpxsave)
CLTS
FXSAVEQ ((%rdi))
fninit /* clear exceptions, init x87 tags */
STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
ret
SET_SIZE(fpxsave)
ENTRY_NP(xsave)
CLTS
movl %esi, %eax /* bv mask */
movq %rsi, %rdx
shrq $32, %rdx
#xsave (%rdi)
.byte 0x0f, 0xae, 0x27
fninit /* clear exceptions, init x87 tags */
STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
ret
SET_SIZE(xsave)
#elif defined(__i386)
ENTRY_NP(fpsave)
CLTS
movl 4(%esp), %eax
fnsave (%eax)
STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
ret
SET_SIZE(fpsave)
ENTRY_NP(fpxsave)
CLTS
movl 4(%esp), %eax
fxsave (%eax)
fninit /* clear exceptions, init x87 tags */
STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
ret
SET_SIZE(fpxsave)
ENTRY_NP(xsave)
CLTS
movl 4(%esp), %ecx
movl 8(%esp), %eax
movl 12(%esp), %edx
#xsave (%ecx)
.byte 0x0f, 0xae, 0x21
fninit /* clear exceptions, init x87 tags */
STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
ret
SET_SIZE(xsave)
#endif /* __i386 */
#endif /* __lint */
#if defined(__lint)
/*ARGSUSED*/
void
fprestore(struct fnsave_state *f)
{}
/*ARGSUSED*/
void
fpxrestore(struct fxsave_state *f)
{}
/*ARGSUSED*/
void
xrestore(struct xsave_state *f, uint64_t m)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpxrestore)
CLTS
FXRSTORQ ((%rdi))
ret
SET_SIZE(fpxrestore)
ENTRY_NP(xrestore)
CLTS
movl %esi, %eax /* bv mask */
movq %rsi, %rdx
shrq $32, %rdx
#xrstor (%rdi)
.byte 0x0f, 0xae, 0x2f
ret
SET_SIZE(xrestore)
#elif defined(__i386)
ENTRY_NP(fprestore)
CLTS
movl 4(%esp), %eax
frstor (%eax)
ret
SET_SIZE(fprestore)
ENTRY_NP(fpxrestore)
CLTS
movl 4(%esp), %eax
fxrstor (%eax)
ret
SET_SIZE(fpxrestore)
ENTRY_NP(xrestore)
CLTS
movl 4(%esp), %ecx
movl 8(%esp), %eax
movl 12(%esp), %edx
#xrstor (%ecx)
.byte 0x0f, 0xae, 0x29
ret
SET_SIZE(xrestore)
#endif /* __i386 */
#endif /* __lint */
/*
* Disable the floating point unit.
*/
#if defined(__lint)
void
fpdisable(void)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpdisable)
STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
ret
SET_SIZE(fpdisable)
#elif defined(__i386)
ENTRY_NP(fpdisable)
STTS(%eax)
ret
SET_SIZE(fpdisable)
#endif /* __i386 */
#endif /* __lint */
/*
* Initialize the fpu hardware.
*/
#if defined(__lint)
void
fpinit(void)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpinit)
CLTS
cmpl $FP_XSAVE, fp_save_mech
je 1f
/* fxsave */
leaq sse_initial(%rip), %rax
FXRSTORQ ((%rax)) /* load clean initial state */
ret
1: /* xsave */
leaq avx_initial(%rip), %rcx
xorl %edx, %edx
movl $XFEATURE_AVX, %eax
bt $X86FSET_AVX, x86_featureset
cmovael %edx, %eax
orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
/* xrstor (%rcx) */
.byte 0x0f, 0xae, 0x29 /* load clean initial state */
ret
SET_SIZE(fpinit)
#elif defined(__i386)
ENTRY_NP(fpinit)
CLTS
cmpl $FP_FXSAVE, fp_save_mech
je 1f
cmpl $FP_XSAVE, fp_save_mech
je 2f
/* fnsave */
fninit
movl $x87_initial, %eax
frstor (%eax) /* load clean initial state */
ret
1: /* fxsave */
movl $sse_initial, %eax
fxrstor (%eax) /* load clean initial state */
ret
2: /* xsave */
movl $avx_initial, %ecx
xorl %edx, %edx
movl $XFEATURE_AVX, %eax
bt $X86FSET_AVX, x86_featureset
cmovael %edx, %eax
orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
/* xrstor (%ecx) */
.byte 0x0f, 0xae, 0x29 /* load clean initial state */
ret
SET_SIZE(fpinit)
#endif /* __i386 */
#endif /* __lint */
/*
* Clears FPU exception state.
* Returns the FP status word.
*/
#if defined(__lint)
uint32_t
fperr_reset(void)
{ return (0); }
uint32_t
fpxerr_reset(void)
{ return (0); }
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fperr_reset)
CLTS
xorl %eax, %eax
fnstsw %ax
fnclex
ret
SET_SIZE(fperr_reset)
ENTRY_NP(fpxerr_reset)
pushq %rbp
movq %rsp, %rbp
subq $0x10, %rsp /* make some temporary space */
CLTS
stmxcsr (%rsp)
movl (%rsp), %eax
andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
ldmxcsr (%rsp) /* clear processor exceptions */
leave
ret
SET_SIZE(fpxerr_reset)
#elif defined(__i386)
ENTRY_NP(fperr_reset)
CLTS
xorl %eax, %eax
fnstsw %ax
fnclex
ret
SET_SIZE(fperr_reset)
ENTRY_NP(fpxerr_reset)
CLTS
subl $4, %esp /* make some temporary space */
stmxcsr (%esp)
movl (%esp), %eax
andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
ldmxcsr (%esp) /* clear processor exceptions */
addl $4, %esp
ret
SET_SIZE(fpxerr_reset)
#endif /* __i386 */
#endif /* __lint */
#if defined(__lint)
uint32_t
fpgetcwsw(void)
{
return (0);
}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpgetcwsw)
pushq %rbp
movq %rsp, %rbp
subq $0x10, %rsp /* make some temporary space */
CLTS
fnstsw (%rsp) /* store the status word */
fnstcw 2(%rsp) /* store the control word */
movl (%rsp), %eax /* put both in %eax */
leave
ret
SET_SIZE(fpgetcwsw)
#elif defined(__i386)
ENTRY_NP(fpgetcwsw)
CLTS
subl $4, %esp /* make some temporary space */
fnstsw (%esp) /* store the status word */
fnstcw 2(%esp) /* store the control word */
movl (%esp), %eax /* put both in %eax */
addl $4, %esp
ret
SET_SIZE(fpgetcwsw)
#endif /* __i386 */
#endif /* __lint */
/*
* Returns the MXCSR register.
*/
#if defined(__lint)
uint32_t
fpgetmxcsr(void)
{
return (0);
}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpgetmxcsr)
pushq %rbp
movq %rsp, %rbp
subq $0x10, %rsp /* make some temporary space */
CLTS
stmxcsr (%rsp)
movl (%rsp), %eax
leave
ret
SET_SIZE(fpgetmxcsr)
#elif defined(__i386)
ENTRY_NP(fpgetmxcsr)
CLTS
subl $4, %esp /* make some temporary space */
stmxcsr (%esp)
movl (%esp), %eax
addl $4, %esp
ret
SET_SIZE(fpgetmxcsr)
#endif /* __i386 */
#endif /* __lint */