fpu.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/signal.h>
#include <sys/trap.h>
#include <sys/machtrap.h>
#include <sys/fault.h>
#include <sys/systm.h>
#include <sys/user.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/core.h>
#include <sys/pcb.h>
#include <sys/cpuvar.h>
#include <sys/thread.h>
#include <sys/disp.h>
#include <sys/stack.h>
#include <sys/cmn_err.h>
#include <sys/privregs.h>
#include <sys/debug.h>
#include <sys/fpu/fpu_simulator.h>
#include <sys/fpu/globals.h>
#include <sys/fpu/fpusystm.h>
int fpdispr = 0;
/*
* For use by procfs to save the floating point context of the thread.
* Note the if (ttolwp(lwp) == curthread) in prstop, which calls
* this function, ensures that it is safe to read the fprs here.
*/
void
fp_prsave(kfpu_t *fp)
{
if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
kpreempt_disable();
if (fpu_exists) {
fp->fpu_fprs = _fp_read_fprs();
if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
_fp_write_fprs(fprs);
fp->fpu_fprs = fprs;
#ifdef DEBUG
if (fpdispr)
cmn_err(CE_NOTE,
"fp_prsave with fp disabled!");
#endif
}
fp_fksave(fp);
}
kpreempt_enable();
}
}
/*
* Copy the floating point context of the forked thread.
*/
void
fp_fork(klwp_t *lwp, klwp_t *clwp)
{
kfpu_t *cfp, *pfp;
int i;
cfp = lwptofpu(clwp);
pfp = lwptofpu(lwp);
/*
* copy the parents fpq
*/
cfp->fpu_qcnt = pfp->fpu_qcnt;
for (i = 0; i < pfp->fpu_qcnt; i++)
cfp->fpu_q[i] = pfp->fpu_q[i];
/*
* save the context of the parent into the childs fpu structure
*/
cfp->fpu_fprs = pfp->fpu_fprs;
if (ttolwp(curthread) == lwp && fpu_exists) {
fp_fksave(cfp);
} else {
for (i = 0; i < 32; i++)
cfp->fpu_fr.fpu_regs[i] = pfp->fpu_fr.fpu_regs[i];
for (i = 16; i < 32; i++)
cfp->fpu_fr.fpu_dregs[i] = pfp->fpu_fr.fpu_dregs[i];
}
cfp->fpu_en = 1;
}
/*
* Free any state associated with floating point context.
* Fp_free can be called in two cases:
* 1) from reaper -> thread_free -> lwp_freeregs -> fp_free
* fp context belongs to a thread on deathrow
* nothing to do, thread will never be resumed
* thread calling ctxfree is reaper
*
* 2) from exec -> lwp_freeregs -> fp_free
* fp context belongs to the current thread
* must disable fpu, thread calling ctxfree is curthread
*/
/*ARGSUSED1*/
void
fp_free(kfpu_t *fp, int isexec)
{
int s;
uint32_t fprs = 0;
if (curthread->t_lwp != NULL && lwptofpu(curthread->t_lwp) == fp) {
fp->fpu_en = 0;
fp->fpu_fprs = fprs;
s = splhigh();
_fp_write_fprs(fprs);
splx(s);
}
}
#ifdef SF_ERRATA_30 /* call causes fp-disabled */
extern int spitfire_call_bug;
int ill_fpcalls;
#endif
void
fp_enable(void)
{
klwp_id_t lwp;
kfpu_t *fp;
lwp = ttolwp(curthread);
ASSERT(lwp != NULL);
fp = lwptofpu(lwp);
if (fpu_exists) {
if (fp->fpu_en) {
#ifdef DEBUG
if (fpdispr)
cmn_err(CE_NOTE,
"fpu disabled, but already enabled\n");
#endif
if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
fp->fpu_fprs = FPRS_FEF;
#ifdef DEBUG
if (fpdispr)
cmn_err(CE_NOTE,
"fpu disabled, saved fprs disabled\n");
#endif
}
_fp_write_fprs(FPRS_FEF);
fp_restore(fp);
} else {
fp->fpu_en = 1;
fp->fpu_fsr = 0;
fp->fpu_fprs = FPRS_FEF;
_fp_write_fprs(FPRS_FEF);
fp_clearregs(fp);
}
} else {
int i;
if (!fp->fpu_en) {
fp->fpu_en = 1;
fp->fpu_fsr = 0;
for (i = 0; i < 32; i++)
fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */
for (i = 16; i < 32; i++) /* NaN */
fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1;
}
}
}
/*
* fp_disabled normally occurs when the first floating point in a non-threaded
* program causes an fp_disabled trap. For threaded programs, the ILP32 threads
* library calls the .setpsr fasttrap, which has been modified to also set the
* appropriate bits in fpu_en and fpu_fprs, as well as to enable the %fprs,
* as before. The LP64 threads library will write to the %fprs directly,
* so fpu_en will never get updated for LP64 threaded programs,
* although fpu_fprs will, via resume.
*/
void
fp_disabled(struct regs *rp)
{
klwp_id_t lwp;
kfpu_t *fp;
int ftt;
#ifdef SF_ERRATA_30 /* call causes fp-disabled */
/*
* This code is here because sometimes the call instruction
* generates an fp_disabled trap when the call offset is large.
*/
if (spitfire_call_bug) {
uint_t instr = 0;
extern void trap(struct regs *rp, caddr_t addr, uint32_t type,
uint32_t mmu_fsr);
if (USERMODE(rp->r_tstate)) {
(void) fuword32((void *)rp->r_pc, &instr);
} else {
instr = *(uint_t *)(rp->r_pc);
}
if ((instr & 0xc0000000) == 0x40000000) {
ill_fpcalls++;
trap(rp, NULL, T_UNIMP_INSTR, 0);
return;
}
}
#endif /* SF_ERRATA_30 - call causes fp-disabled */
#ifdef CHEETAH_ERRATUM_109 /* interrupts not taken during fpops */
/*
* UltraSPARC III will report spurious fp-disabled exceptions when
* the pipe is full of fpops and an interrupt is triggered. By the
* time we get here the interrupt has been taken and we just need
* to return to where we came from and try again.
*/
if (fpu_exists && _fp_read_fprs() & FPRS_FEF)
return;
#endif /* CHEETAH_ERRATUM_109 */
lwp = ttolwp(curthread);
ASSERT(lwp != NULL);
fp = lwptofpu(lwp);
if (fpu_exists) {
kpreempt_disable();
if (fp->fpu_en) {
#ifdef DEBUG
if (fpdispr)
cmn_err(CE_NOTE,
"fpu disabled, but already enabled\n");
#endif
if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
fp->fpu_fprs = FPRS_FEF;
#ifdef DEBUG
if (fpdispr)
cmn_err(CE_NOTE,
"fpu disabled, saved fprs disabled\n");
#endif
}
_fp_write_fprs(FPRS_FEF);
fp_restore(fp);
} else {
fp->fpu_en = 1;
fp->fpu_fsr = 0;
fp->fpu_fprs = FPRS_FEF;
_fp_write_fprs(FPRS_FEF);
fp_clearregs(fp);
}
kpreempt_enable();
} else {
fp_simd_type fpsd;
int i;
(void) flush_user_windows_to_stack(NULL);
if (!fp->fpu_en) {
fp->fpu_en = 1;
fp->fpu_fsr = 0;
for (i = 0; i < 32; i++)
fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */
for (i = 16; i < 32; i++) /* NaN */
fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1;
}
if (ftt = fp_emulator(&fpsd, (fp_inst_type *)rp->r_pc,
rp, (ulong_t *)rp->r_sp, fp)) {
fp->fpu_q_entrysize = sizeof (struct fpq);
fp_traps(&fpsd, ftt, rp);
}
}
}
/*
* Process the floating point queue in lwp->lwp_pcb.
*
* Each entry in the floating point queue is processed in turn.
* If processing an entry results in an exception fp_traps() is called to
* handle the exception - this usually results in the generation of a signal
* to be delivered to the user. There are 2 possible outcomes to this (note
* that hardware generated signals cannot be held!):
*
* 1. If the signal is being ignored we continue to process the rest
* of the entries in the queue.
*
* 2. If arrangements have been made for return to a user signal handler,
* sendsig() will have copied the floating point queue onto the user's
* signal stack and zero'ed the queue count in the u_pcb. Note that
* this has the side effect of terminating fp_runq's processing loop.
* We will re-run the floating point queue on return from the user
* signal handler if necessary as part of normal setcontext processing.
*/
void
fp_runq(struct regs *rp)
{
kfpu_t *fp = lwptofpu(curthread->t_lwp);
struct fq *fqp = fp->fpu_q;
fp_simd_type fpsd;
uint64_t gsr = get_gsr(fp);
/*
* don't preempt while manipulating the queue
*/
kpreempt_disable();
while (fp->fpu_qcnt) {
int fptrap;
fptrap = fpu_simulator((fp_simd_type *)&fpsd,
(fp_inst_type *)fqp->FQu.fpq.fpq_addr,
(fsr_type *)&fp->fpu_fsr, gsr,
fqp->FQu.fpq.fpq_instr);
if (fptrap) {
/*
* Instruction could not be simulated so we will
* attempt to deliver a signal.
* We may be called again upon signal exit (setcontext)
* and can continue to process the queue then.
*/
if (fqp != fp->fpu_q) {
int i;
struct fq *fqdp;
/*
* We need to normalize the floating queue so
* the excepting instruction is at the head,
* so that the queue may be copied onto the
* user signal stack by sendsig().
*/
fqdp = fp->fpu_q;
for (i = fp->fpu_qcnt; i; i--) {
*fqdp++ = *fqp++;
}
fqp = fp->fpu_q;
}
fp->fpu_q_entrysize = sizeof (struct fpq);
/*
* fpu_simulator uses the fp registers directly but it
* uses the software copy of the fsr. We need to write
* that back to fpu so that fpu's state is current for
* ucontext.
*/
if (fpu_exists)
_fp_write_pfsr(&fp->fpu_fsr);
/* post signal */
fp_traps(&fpsd, fptrap, rp);
/*
* Break from loop to allow signal to be sent.
* If there are other instructions in the fp queue
* they will be processed when/if the user retuns
* from the signal handler with a non-empty queue.
*/
break;
}
fp->fpu_qcnt--;
fqp++;
}
/*
* fpu_simulator uses the fp registers directly, so we have
* to update the pcb copies to keep current, but it uses the
* software copy of the fsr, so we write that back to fpu
*/
if (fpu_exists) {
int i;
for (i = 0; i < 32; i++)
_fp_read_pfreg(&fp->fpu_fr.fpu_regs[i], i);
for (i = 16; i < 32; i++)
_fp_read_pdreg(&fp->fpu_fr.fpu_dregs[i], i);
_fp_write_pfsr(&fp->fpu_fsr);
}
kpreempt_enable();
}
/*
* Get the precise trapped V9 floating point instruction.
* Fake up a queue to process. If getting the instruction results
* in an exception fp_traps() is called to handle the exception - this
* usually results in the generation of a signal to be delivered to the user.
*/
void
fp_precise(struct regs *rp)
{
fp_simd_type fpsd;
int inst_ftt;
union {
uint_t i;
fp_inst_type inst;
} kluge;
klwp_t *lwp = ttolwp(curthread);
kfpu_t *fp = lwptofpu(lwp);
uint64_t gsr;
int mstate;
if (fpu_exists)
save_gsr(fp);
gsr = get_gsr(fp);
/*
* Get the instruction to be emulated from the pc saved by the trap.
* Note that the kernel is NOT prepared to handle a kernel fp
* exception if it can't pass successfully through the fp simulator.
*
* If the trap occurred in user mode, set lwp_state to LWP_SYS for the
* purposes of clock accounting and switch to the LMS_TRAP microstate.
*/
if (USERMODE(rp->r_tstate)) {
inst_ftt = _fp_read_inst((uint32_t *)rp->r_pc, &kluge.i, &fpsd);
mstate = new_mstate(curthread, LMS_TRAP);
lwp->lwp_state = LWP_SYS;
} else {
kluge.i = *(uint_t *)rp->r_pc;
inst_ftt = ftt_none;
}
if (inst_ftt != ftt_none) {
/*
* Save the bad address and post the signal.
* It can only be an ftt_alignment or ftt_fault trap.
* XXX - How can this work w/mainsail and do_unaligned?
*/
fpsd.fp_trapaddr = (caddr_t)rp->r_pc;
fp_traps(&fpsd, inst_ftt, rp);
} else {
/*
* Conjure up a floating point queue and advance the pc/npc
* to fake a deferred fp trap. We now run the fp simulator
* in fp_precise, while allowing setfpregs to call fp_runq,
* because this allows us to do the ugly machinations to
* inc/dec the pc depending on the trap type, as per
* bugid 1210159. fp_runq is still going to have the
* generic "how do I connect the "fp queue to the pc/npc"
* problem alluded to in bugid 1192883, which is only a
* problem for a restorecontext of a v8 fp queue on a
* v9 system, which seems like the .000000001% case (on v9)!
*/
struct fpq *pfpq = &fp->fpu_q->FQu.fpq;
fp_simd_type fpsd;
int fptrap;
pfpq->fpq_addr = (uint_t *)rp->r_pc;
pfpq->fpq_instr = kluge.i;
fp->fpu_qcnt = 1;
fp->fpu_q_entrysize = sizeof (struct fpq);
kpreempt_disable();
(void) flush_user_windows_to_stack(NULL);
fptrap = fpu_vis_sim((fp_simd_type *)&fpsd,
(fp_inst_type *)pfpq->fpq_addr, rp,
(fsr_type *)&fp->fpu_fsr, gsr, kluge.i);
/* update the hardware fp fsr state for sake of ucontext */
if (fpu_exists)
_fp_write_pfsr(&fp->fpu_fsr);
if (fptrap) {
/* back up the pc if the signal needs to be precise */
if (fptrap != ftt_ieee) {
fp->fpu_qcnt = 0;
}
/* post signal */
fp_traps(&fpsd, fptrap, rp);
/* decrement queue count for ieee exceptions */
if (fptrap == ftt_ieee) {
fp->fpu_qcnt = 0;
}
} else {
fp->fpu_qcnt = 0;
}
/* update the software pcb copies of hardware fp registers */
if (fpu_exists) {
fp_save(fp);
}
kpreempt_enable();
}
/*
* Reset lwp_state to LWP_USER for the purposes of clock accounting,
* and restore the previously saved microstate.
*/
if (USERMODE(rp->r_tstate)) {
(void) new_mstate(curthread, mstate);
lwp->lwp_state = LWP_USER;
}
}
/*
* Handle floating point traps generated by simulation/emulation.
*/
void
fp_traps(
fp_simd_type *pfpsd, /* Pointer to simulator data */
enum ftt_type ftt, /* trap type */
struct regs *rp) /* ptr to regs fro trap */
{
/*
* If we take a user's exception in kernel mode, we want to trap
* with the user's registers.
*/
switch (ftt) {
case ftt_ieee:
fpu_trap(rp, pfpsd->fp_trapaddr, T_FP_EXCEPTION_IEEE,
pfpsd->fp_trapcode);
break;
case ftt_fault:
fpu_trap(rp, pfpsd->fp_trapaddr, T_DATA_EXCEPTION, 0);
break;
case ftt_alignment:
fpu_trap(rp, pfpsd->fp_trapaddr, T_ALIGNMENT, 0);
break;
case ftt_unimplemented:
fpu_trap(rp, pfpsd->fp_trapaddr, T_UNIMP_INSTR, 0);
break;
default:
/*
* We don't expect any of the other types here.
*/
cmn_err(CE_PANIC, "fp_traps: bad ftt");
}
}