float.s revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
/* All Rights Reserved */
/* Copyright (c) 1987, 1988 Microsoft Corporation */
/* All Rights Reserved */
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/asm_linkage.h>
#include <sys/privregs.h>
#include <sys/x86_archext.h>
#if defined(__lint)
#else
#include "assym.h"
#endif
#if defined(__lint)
int fpu_exists = 1;
int fpu_ignored = 0;
int use_sse_pagecopy = 0;
int use_sse_pagezero = 0;
int use_sse_copy = 0;
#if defined(__i386)
int fpu_pentium_fdivbug = 0;
#endif
#else /* __lint */
/*
* If fpu_exists is non-zero, fpu_probe will attempt to use any
* hardware FPU (subject to other constraints, see below). If
* fpu_exists is zero, fpu_probe will report that there is no
* FPU even if there is one.
*/
.long 1
.long FP_387 /* FP_NO, FP_287, FP_387, etc. */
/*
* The variable fpu_ignored is provided to allow other code to
* determine whether emulation is being done because there is
*/
.long 0
/*
* Used by ppcopy, ppzero, and xcopyin to determine whether or not
* to use the SSE-based routines
*/
.long 0
.long 0
.long 0
#if defined(__i386)
/*
* The variable fpu_pentium_fdivbug is provided to allow other code to
* determine whether the system contains a Pentium with the FDIV
* problem.
*/
.long 0
/*
* The following constants are used for detecting the Pentium
* divide bug.
*/
.align 4
#endif /* __i386 */
#endif /* __lint */
/*
* FPU probe - check if we have any FP chip present by trying to do a reset.
* If that succeeds, differentiate via cr0. Called from autoconf.
*/
#if defined(__lint)
/*ARGSUSED*/
void
fpu_probe(void)
{}
#else /* __lint */
#if defined(__amd64)
clts /* clear task switched bit in CR0 */
fninit /* initialize chip */
/*
* Ignore the FPU if fp_exists == 0
*/
/*
* we have a chip of some sort; use cr0 to differentiate
*/
/*
* Tell the processor what we're doing via %cr4
*/
/*
* make other CPUs share the same cr4 settings
*/
/*
* extract the MXCSR_MASK field from our first fxsave
*/
1:
/*
* We have SSE and SSE2 so enable the extensions for
* non-temporal copies and stores.
*/
/*
* Do not use the FPU at all
*/
/*
* No FPU hardware present
*/
/*
* Disable the XMM-related gorp too, in case the BIOS set them
*/
done:
/
/
/
/
/
/
/
cmpl $0, fpu_exists
/
/
/
/
/
/
/
/
/ (the default mask set in fpinit() disables them)
/ - determine the mxcsr_mask so we can avoid setting reserved bits
/
movl $__FP_SSE, fp_kind
movl $fpxsave_begin, %eax
movl %eax, fpsave_begin
call patch_sse
mov %cr4, %eax
orl $_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
mov %eax, %cr4
/
/ make other CPUs share the same cr4 settings
/
orl $_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value
/
/ extract the MXCSR_MASK field from our first fxsave
/
subl $FXSAVE_STATE_SIZE + XMM_ALIGN, %esp
movl %esp, %eax
addl $XMM_ALIGN, %eax
andl $_BITNOT(XMM_ALIGN-1), %eax /* 16-byte alignment */
movl $0, FXSAVE_STATE_MXCSR_MASK(%eax)
fxsave (%eax)
movl FXSAVE_STATE_MXCSR_MASK(%eax), %eax
addl $FXSAVE_STATE_SIZE + XMM_ALIGN, %esp
cmpl $0, %eax
je 1f / default mask value set in fpu.c
movl %eax, sse_mxcsr_mask / override mask set here
1: testl $X86_SSE2, x86_feature / can we do SSE2?
je mathchip
/
/ aha .. we have an SSE2-capable chip
/
/ - enable pagezero and pagecopy using non-temporal instructions
/ - hot patch membar_consumer() to use lfence instead of lock
/
movl $1, use_sse_pagecopy / will now call hwblkpagecopy
movl $1, use_sse_pagezero / will now call hwblkclr
movl $1, use_sse_copy
call patch_sse2
jmp mathchip
/
/ No 387; we must have an 80287.
/
is287:
#if !defined(__GNUC_AS__)
fsetpm / set the 80287 into protected mode
movl $FP_287, fp_kind / we have a 287 chip
#else
movl $FP_NO, fp_kind / maybe just explode here instead?
#endif
/
/ We have either a 287, 387, 486 or P5.
/ Setup cr0 to reflect the FPU hw type.
/
mathchip:
movl %cr0, %edx
andl $_BITNOT(CR0_TS|CR0_EM), %edx /* clear emulate math bit */
orl $_CONST(CR0_MP|CR0_NE), %edx
jmp cont
/ Do not use the FPU
ignore_fpu:
movl $1, fpu_ignored
/ No FP hw present.
no_fpu_hw:
movl %cr0, %edx
andl $_BITNOT(CR0_MP), %edx /* clear math chip present */
movl $FP_NO, fp_kind / signify that there is no FPU
movl $0, fpu_exists / no FPU present
cont:
movl %edx, %cr0 / set machine status word
ret
SET_SIZE(fpu_probe)
#define HOT_PATCH(srcaddr, dstaddr, size) \
movl $srcaddr, %esi; \
movl $dstaddr, %edi; \
movl $size, %ebx; \
0: pushl $1; \
movzbl (%esi), %eax; \
pushl %eax; \
pushl %edi; \
call hot_patch_kernel_text; \
addl $12, %esp; \
inc %edi; \
inc %esi; \
dec %ebx; \
test %ebx, %ebx; \
jne 0b
/*
* instructions, patch hot paths in the kernel to use them only
* when that feature has been detected.
*/
ENTRY_NP(patch_sse)
push %ebp
mov %esp, %ebp
push %ebx
push %esi
push %edi
/
/ frstor (%eax); nop -> fxrstor (%eax)
/
HOT_PATCH(_fxrstor_eax_insn, _patch_fxrstor_eax, 3)
/
/ nop; nop; nop -> ldmxcsr (%ebx)
/
HOT_PATCH(_ldmxcsr_ebx_insn, _patch_ldmxcsr_ebx, 3)
/
/ lock; xorl $0, (%esp) -> sfence; ret
/
HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
pop %edi
pop %esi
pop %ebx
mov %ebp, %esp
pop %ebp
ret
_fxrstor_eax_insn: / see ndptrap_frstor()
fxrstor (%eax)
_ldmxcsr_ebx_insn: / see resume_from_zombie()
ldmxcsr (%ebx)
_sfence_ret_insn: / see membar_producer()
.byte 0xf, 0xae, 0xf8 / [sfence instruction]
ret
SET_SIZE(patch_sse)
/*
* Ditto, but this time for functions that depend upon SSE2 extensions
*/
ENTRY_NP(patch_sse2)
push %ebp
mov %esp, %ebp
push %ebx
push %esi
push %edi
/
/ lock; xorl $0, (%esp) -> lfence; ret
/
HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
pop %edi
pop %esi
pop %ebx
mov %ebp, %esp
pop %ebp
ret
_lfence_ret_insn: / see membar_consumer()
.byte 0xf, 0xae, 0xe8 / [lfence instruction]
ret
SET_SIZE(patch_sse2)
#endif /* __i386 */
#endif /* __lint */
/*
* One of these routines is called from any lwp with floating
* point context as part of the prolog of a context switch; the
* routine starts the floating point state save operation.
* The completion of the save is forced by an fwait just before
* we truly switch contexts..
*/
#if defined(__lint)
/*ARGSUSED*/
void
fpnsave_begin(void *arg)
{}
/*ARGSUSED*/
void
fpxsave_begin(void *arg)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpxsave_begin)
movl FPU_CTX_FPU_FLAGS(%rdi), %edx
cmpl $FPU_EN, %edx
jne 1f
#if FPU_CTX_FPU_REGS != 0
addq FPU_CTX_FPU_REGS, %rdi
#endif
fxsave (%rdi)
fnclex /* clear pending x87 exceptions */
1: ret
SET_SIZE(fpxsave_begin)
#elif defined(__i386)
ENTRY_NP(fpnsave_begin)
mov 4(%esp), %eax / a struct fpu_ctx *
mov FPU_CTX_FPU_FLAGS(%eax), %edx
cmpl $FPU_EN, %edx
jne 1f
#if FPU_CTX_FPU_REGS != 0
addl FPU_CTX_FPU_REGS, %eax
#endif
fnsave (%eax)
1: ret
SET_SIZE(fpnsave_begin)
ENTRY_NP(fpxsave_begin)
mov 4(%esp), %eax / a struct fpu_ctx *
mov FPU_CTX_FPU_FLAGS(%eax), %edx
cmpl $FPU_EN, %edx
jne 1f
#if FPU_CTX_FPU_REGS != 0
addl FPU_CTX_FPU_REGS, %eax
#endif
fxsave (%eax)
fnclex / Clear pending x87 exceptions
1: ret
SET_SIZE(fpxsave_begin)
#endif /* __i386 */
#endif /* __lint */
#if defined(__lint)
/*ARGSUSED*/
void
fpsave(struct fnsave_state *f)
{}
/*ARGSUSED*/
void
fpxsave(struct fxsave_state *f)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpxsave)
clts /* clear TS bit in CR0 */
fxsave (%rdi)
fnclex /* clear pending x87 exceptions */
fwait /* wait for completion */
fninit /* emulate fnsave: init x87 tags */
movq %cr0, %rax
orq $CR0_TS, %rax
movq %rax, %cr0 /* set TS bit in CR0 (disable FPU) */
ret
SET_SIZE(fpxsave)
#elif defined(__i386)
ENTRY_NP(fpsave)
clts / clear TS bit in CR0
movl 4(%esp), %eax / load save address
fnsave (%eax)
fwait / wait for completion
movl %cr0, %eax
orl $CR0_TS, %eax
movl %eax, %cr0 / set TS bit in CR0 (disable FPU)
ret
SET_SIZE(fpsave)
ENTRY_NP(fpxsave)
clts / clear TS bit in CR0
movl 4(%esp), %eax / save address
fxsave (%eax)
fnclex / Clear pending x87 exceptions
fwait / wait for completion
fninit / emulate fnsave: init x87 tag words
mov %cr0, %eax
orl $CR0_TS, %eax
movl %eax, %cr0 / set TS bit in CR0 (disable FPU)
ret
SET_SIZE(fpxsave)
#endif /* __i386 */
#endif /* __lint */
#if defined(__lint)
/*ARGSUSED*/
void
fprestore(struct fnsave_state *f)
{}
/*ARGSUSED*/
void
fpxrestore(struct fxsave_state *f)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpxrestore)
clts /* clear TS bit in CR0 */
fxrstor (%rdi)
ret
SET_SIZE(fpxrestore)
#elif defined(__i386)
ENTRY_NP(fprestore)
clts / clear TS bit in CR0
movl 4(%esp), %eax / load restore address
frstor (%eax)
ret
SET_SIZE(fprestore)
ENTRY_NP(fpxrestore)
clts / clear TS bit in CR0
movl 4(%esp), %eax / load restore address
fxrstor (%eax)
ret
SET_SIZE(fpxrestore)
#endif /* __i386 */
#endif /* __lint */
/*
* Disable the floating point unit.
*/
#if defined(__lint)
void
fpdisable(void)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpdisable)
movq %cr0, %rax
orq $CR0_TS, %rax
movq %rax, %cr0 /* set TS bit in CR0 (disable FPU) */
ret
SET_SIZE(fpdisable)
#elif defined(__i386)
ENTRY_NP(fpdisable)
movl %cr0, %eax
orl $CR0_TS, %eax
movl %eax, %cr0 / set TS bit in CR0 (disable FPU)
ret
SET_SIZE(fpdisable)
#endif /* __i386 */
#endif /* __lint */
/*
* Initialize the fpu hardware.
*/
#if defined(__lint)
void
fpinit(void)
{}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpinit)
clts /* clear TS bit in CR0 */
leaq sse_initial(%rip), %rax
fxrstor (%rax) /* load clean initial state */
ret
SET_SIZE(fpinit)
#elif defined(__i386)
ENTRY_NP(fpinit)
clts / clear TS bit in CR0
cmpl $__FP_SSE, fp_kind
je 1f
fninit / initialize the chip
movl $x87_initial, %eax
frstor (%eax) / load clean initial state
ret
1:
movl $sse_initial, %eax
fxrstor (%eax) / load clean initial state
ret
SET_SIZE(fpinit)
#endif /* __i386 */
#endif /* __lint */
/*
* Clears FPU exception state.
* Returns the FP status word.
*/
#if defined(__lint)
uint32_t
fperr_reset(void)
{
return (0);
}
uint32_t
fpxerr_reset(void)
{
return (0);
}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fperr_reset)
xorl %eax, %eax
clts /* clear TS bit in CR0 */
fnstsw %ax /* get status */
fnclex /* clear processor exceptions */
ret
SET_SIZE(fperr_reset)
ENTRY_NP(fpxerr_reset)
pushq %rbp
movq %rsp, %rbp
subq $0x10, %rsp /* make some temporary space */
clts /* clear TS bit in CR0 */
stmxcsr (%rsp) /* get status */
movl (%rsp), %eax
andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
ldmxcsr (%rsp) /* clear processor exceptions */
leave
ret
SET_SIZE(fpxerr_reset)
#elif defined(__i386)
ENTRY_NP(fperr_reset)
xorl %eax, %eax
clts / clear TS bit in CR0
fnstsw %ax / get status
fnclex / clear processor exceptions
ret
SET_SIZE(fperr_reset)
ENTRY_NP(fpxerr_reset)
clts / clear TS bit in CR0
subl $4, %esp / make some temporary space
stmxcsr (%esp) / get status
movl (%esp), %eax
andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
ldmxcsr (%esp) / clear processor exceptions
addl $4, %esp
ret
SET_SIZE(fpxerr_reset)
#endif /* __i386 */
#endif /* __lint */
#if defined(__lint)
uint32_t
fpgetcwsw(void)
{
return (0);
}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpgetcwsw)
pushq %rbp
movq %rsp, %rbp
subq $0x10, %rsp /* make some temporary space */
clts /* clear TS bit in CR0 */
fnstsw (%rsp) /* store the status word */
fnstcw 2(%rsp) /* store the control word */
movl (%rsp), %eax /* put both in %eax */
leave
ret
SET_SIZE(fpgetcwsw)
#elif defined(__i386)
ENTRY_NP(fpgetcwsw)
clts /* clear TS bit in CR0 */
subl $4, %esp /* make some temporary space */
fnstsw (%esp) /* store the status word */
fnstcw 2(%esp) /* store the control word */
movl (%esp), %eax /* put both in %eax */
addl $4, %esp
ret
SET_SIZE(fpgetcwsw)
#endif /* __i386 */
#endif /* __lint */
/*
* Returns the MXCSR register.
*/
#if defined(__lint)
uint32_t
fpgetmxcsr(void)
{
return (0);
}
#else /* __lint */
#if defined(__amd64)
ENTRY_NP(fpgetmxcsr)
pushq %rbp
movq %rsp, %rbp
subq $0x10, %rsp /* make some temporary space */
clts /* clear TS bit in CR0 */
stmxcsr (%rsp) /* get status */
movl (%rsp), %eax
leave
ret
SET_SIZE(fpgetmxcsr)
#elif defined(__i386)
ENTRY_NP(fpgetmxcsr)
clts /* clear TS bit in CR0 */
subl $4, %esp /* make some temporary space */
stmxcsr (%esp) /* get status */
movl (%esp), %eax
addl $4, %esp
ret
SET_SIZE(fpgetmxcsr)
#endif /* __i386 */
#endif /* __lint */