/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* common code with bug fixes from original version in trap.c */
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/archsystm.h>
#include <sys/vmsystm.h>
#include <sys/fpu/fpusystm.h>
#include <sys/fpu/fpu_simulator.h>
#include <sys/inline.h>
#include <sys/debug.h>
#include <sys/privregs.h>
#include <sys/machpcb.h>
#include <sys/simulate.h>
#include <sys/proc.h>
#include <sys/cmn_err.h>
#include <sys/stack.h>
#include <sys/watchpoint.h>
#include <sys/trap.h>
#include <sys/machtrap.h>
#include <sys/mman.h>
#include <sys/asi.h>
#include <sys/copyops.h>
#include <vm/as.h>
#include <vm/page.h>
#include <sys/model.h>
#include <vm/seg_vn.h>
#include <sys/byteorder.h>
#include <sys/time.h>
#define IS_IBIT_SET(x) (x & 0x2000)
#define IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
#define IS_FLOAT_QUAD_OP(op, op3)(op == 2 && (op3 == 0x34 || \
op3 == 0x35))
#define IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi) \
(op == 3 && (op3 == IOP_V8_LDDFA || \
op3 == IOP_V8_STDFA) && asi > ASI_SNFL)
static int aligndebug = 0;
/*
* For the sake of those who must be compatible with unaligned
* architectures, users can link their programs to use a
* corrective trap handler that will fix unaligned references
* a special trap #6 (T_FIX_ALIGN) enables this 'feature'.
* Returns 1 for success, 0 for failure.
*/
int
do_unaligned(struct regs *rp, caddr_t *badaddr)
{
uint_t inst, op3, asi = 0;
uint_t rd, rs1, rs2;
int sz, nf = 0, ltlend = 0;
int floatflg;
int fsrflg;
int immflg;
int lddstdflg;
caddr_t addr;
uint64_t val;
union {
uint64_t l[2];
uint32_t i[4];
uint16_t s[8];
uint8_t c[16];
} data;
ASSERT(USERMODE(rp->r_tstate));
inst = fetch_user_instr((caddr_t)rp->r_pc);
op3 = (inst >> 19) & 0x3f;
rd = (inst >> 25) & 0x1f;
rs1 = (inst >> 14) & 0x1f;
rs2 = inst & 0x1f;
floatflg = (inst >> 24) & 1;
immflg = (inst >> 13) & 1;
lddstdflg = fsrflg = 0;
/* if not load or store do nothing */
if ((inst >> 30) != 3)
return (0);
/* if ldstub or swap, do nothing */
if ((inst & 0xc1680000) == 0xc0680000)
return (0);
/* if cas/casx, do nothing */
if ((inst & 0xc1e00000) == 0xc1e00000)
return (0);
if (floatflg) {
switch ((inst >> 19) & 3) { /* map size bits to a number */
case 0: sz = 4;
break; /* ldf{a}/stf{a} */
case 1: fsrflg = 1;
if (rd == 0)
sz = 4; /* ldfsr/stfsr */
else if (rd == 1)
sz = 8; /* ldxfsr/stxfsr */
else
return (SIMU_ILLEGAL);
break;
case 2: sz = 16;
break; /* ldqf{a}/stqf{a} */
case 3: sz = 8;
break; /* lddf{a}/stdf{a} */
}
/*
* Fix to access extra double register encoding plus
* compensate to access the correct fpu_dreg.
*/
if ((sz > 4) && (fsrflg == 0)) {
if ((rd & 1) == 1)
rd = (rd & 0x1e) | 0x20;
rd = rd >> 1;
if ((sz == 16) && ((rd & 0x1) != 0))
return (SIMU_ILLEGAL);
}
} else {
int sz_bits = (inst >> 19) & 0xf;
switch (sz_bits) { /* map size bits to a number */
case 0: /* lduw{a} */
case 4: /* stw{a} */
case 8: /* ldsw{a} */
case 0xf: /* swap */
sz = 4; break;
case 1: /* ldub{a} */
case 5: /* stb{a} */
case 9: /* ldsb{a} */
case 0xd: /* ldstub */
sz = 1; break;
case 2: /* lduh{a} */
case 6: /* sth{a} */
case 0xa: /* ldsh{a} */
sz = 2; break;
case 3: /* ldd{a} */
case 7: /* std{a} */
lddstdflg = 1;
sz = 8; break;
case 0xb: /* ldx{a} */
case 0xe: /* stx{a} */
sz = 8; break;
}
}
/* only support primary and secondary asi's */
if ((op3 >> 4) & 1) {
if (immflg) {
asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
TSTATE_ASI_MASK;
} else {
asi = (inst >> 5) & 0xff;
}
switch (asi) {
case ASI_P:
case ASI_S:
break;
case ASI_PNF:
case ASI_SNF:
nf = 1;
break;
case ASI_PL:
case ASI_SL:
ltlend = 1;
break;
case ASI_PNFL:
case ASI_SNFL:
ltlend = 1;
nf = 1;
break;
default:
return (0);
}
/*
* Non-faulting stores generate a data_access_exception trap,
* according to the Spitfire manual, which should be signaled
* as an illegal instruction trap, because it can't be fixed.
*/
if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA)))
return (SIMU_ILLEGAL);
}
if (aligndebug) {
printf("unaligned access at %p, instruction: 0x%x\n",
(void *)rp->r_pc, inst);
printf("type %s", (((inst >> 21) & 1) ? "st" : "ld"));
if (((inst >> 21) & 1) == 0)
printf(" %s", (((inst >> 22) & 1) ?
"signed" : "unsigned"));
printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg);
printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n",
rd, op3, rs1, rs2, (inst & 0x1fff));
}
(void) flush_user_windows_to_stack(NULL);
if (getreg(rp, rs1, &val, badaddr))
return (SIMU_FAULT);
addr = (caddr_t)val; /* convert to 32/64 bit address */
if (aligndebug)
printf("addr 1 = %p\n", (void *)addr);
/* check immediate bit and use immediate field or reg (rs2) */
if (immflg) {
int imm;
imm = inst & 0x1fff; /* mask out immediate field */
imm <<= 19; /* sign extend it */
imm >>= 19;
addr += imm; /* compute address */
} else {
if (getreg(rp, rs2, &val, badaddr))
return (SIMU_FAULT);
addr += val;
}
/*
* If this is a 32-bit program, chop the address accordingly. The
* intermediate uintptr_t casts prevent warnings under a certain
* compiler, and the temporary 32 bit storage is intended to force
* proper code generation and break up what would otherwise be a
* quadruple cast.
*/
if (curproc->p_model == DATAMODEL_ILP32) {
caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
addr = (caddr_t)(uintptr_t)addr32;
}
if (aligndebug)
printf("addr 2 = %p\n", (void *)addr);
if (addr >= curproc->p_as->a_userlimit) {
*badaddr = addr;
goto badret;
}
/* a single bit differentiates ld and st */
if ((inst >> 21) & 1) { /* store */
if (floatflg) {
klwp_id_t lwp = ttolwp(curthread);
kfpu_t *fp = lwptofpu(lwp);
/* Ensure fp has been enabled */
if (fpu_exists) {
if (!(_fp_read_fprs() & FPRS_FEF))
fp_enable();
} else {
if (!fp->fpu_en)
fp_enable();
}
/* if fpu_exists read fpu reg */
if (fpu_exists) {
if (fsrflg) {
_fp_read_pfsr(&data.l[0]);
} else {
if (sz == 4) {
data.i[0] = 0;
_fp_read_pfreg(
(unsigned *)&data.i[1], rd);
}
if (sz >= 8)
_fp_read_pdreg(
&data.l[0], rd);
if (sz == 16)
_fp_read_pdreg(
&data.l[1], rd+1);
}
} else {
if (fsrflg) {
/* Clear reserved bits, set version=7 */
fp->fpu_fsr &= ~0x30301000;
fp->fpu_fsr |= 0xE0000;
data.l[0] = fp->fpu_fsr;
} else {
if (sz == 4) {
data.i[0] = 0;
data.i[1] =
(unsigned)fp->
fpu_fr.fpu_regs[rd];
}
if (sz >= 8)
data.l[0] =
fp->fpu_fr.fpu_dregs[rd];
if (sz == 16)
data.l[1] =
fp->fpu_fr.fpu_dregs[rd+1];
}
}
} else {
if (lddstdflg) { /* combine the data */
if (getreg(rp, rd, &data.l[0], badaddr))
return (SIMU_FAULT);
if (getreg(rp, rd+1, &data.l[1], badaddr))
return (SIMU_FAULT);
if (ltlend) {
/*
* For STD, each 32-bit word is byte-
* swapped individually. For
* simplicity we don't want to do that
* below, so we swap the words now to
* get the desired result in the end.
*/
data.i[0] = data.i[3];
} else {
data.i[0] = data.i[1];
data.i[1] = data.i[3];
}
} else {
if (getreg(rp, rd, &data.l[0], badaddr))
return (SIMU_FAULT);
}
}
if (aligndebug) {
if (sz == 16) {
printf("data %x %x %x %x\n",
data.i[0], data.i[1], data.i[2], data.c[3]);
} else {
printf("data %x %x %x %x %x %x %x %x\n",
data.c[0], data.c[1], data.c[2], data.c[3],
data.c[4], data.c[5], data.c[6], data.c[7]);
}
}
if (ltlend) {
if (sz == 1) {
if (xcopyout_little(&data.c[7], addr,
(size_t)sz) != 0)
goto badret;
} else if (sz == 2) {
if (xcopyout_little(&data.s[3], addr,
(size_t)sz) != 0)
goto badret;
} else if (sz == 4) {
if (xcopyout_little(&data.i[1], addr,
(size_t)sz) != 0)
goto badret;
} else {
if (xcopyout_little(&data.l[0], addr,
(size_t)sz) != 0)
goto badret;
}
} else {
if (sz == 1) {
if (copyout(&data.c[7], addr, (size_t)sz) == -1)
goto badret;
} else if (sz == 2) {
if (copyout(&data.s[3], addr, (size_t)sz) == -1)
goto badret;
} else if (sz == 4) {
if (copyout(&data.i[1], addr, (size_t)sz) == -1)
goto badret;
} else {
if (copyout(&data.l[0], addr, (size_t)sz) == -1)
goto badret;
}
}
} else { /* load */
if (sz == 1) {
if (ltlend) {
if (xcopyin_little(addr, &data.c[7],
(size_t)sz) != 0) {
if (nf)
data.c[7] = 0;
else
goto badret;
}
} else {
if (copyin(addr, &data.c[7],
(size_t)sz) == -1) {
if (nf)
data.c[7] = 0;
else
goto badret;
}
}
/* if signed and the sign bit is set extend it */
if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) {
data.i[0] = (uint_t)-1; /* extend sign bit */
data.s[2] = (ushort_t)-1;
data.c[6] = (uchar_t)-1;
} else {
data.i[0] = 0; /* clear upper 32+24 bits */
data.s[2] = 0;
data.c[6] = 0;
}
} else if (sz == 2) {
if (ltlend) {
if (xcopyin_little(addr, &data.s[3],
(size_t)sz) != 0) {
if (nf)
data.s[3] = 0;
else
goto badret;
}
} else {
if (copyin(addr, &data.s[3],
(size_t)sz) == -1) {
if (nf)
data.s[3] = 0;
else
goto badret;
}
}
/* if signed and the sign bit is set extend it */
if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) {
data.i[0] = (uint_t)-1; /* extend sign bit */
data.s[2] = (ushort_t)-1;
} else {
data.i[0] = 0; /* clear upper 32+16 bits */
data.s[2] = 0;
}
} else if (sz == 4) {
if (ltlend) {
if (xcopyin_little(addr, &data.i[1],
(size_t)sz) != 0) {
if (!nf)
goto badret;
data.i[1] = 0;
}
} else {
if (copyin(addr, &data.i[1],
(size_t)sz) == -1) {
if (!nf)
goto badret;
data.i[1] = 0;
}
}
/* if signed and the sign bit is set extend it */
if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) {
data.i[0] = (uint_t)-1; /* extend sign bit */
} else {
data.i[0] = 0; /* clear upper 32 bits */
}
} else {
if (ltlend) {
if (xcopyin_little(addr, &data.l[0],
(size_t)sz) != 0) {
if (!nf)
goto badret;
data.l[0] = 0;
}
} else {
if (copyin(addr, &data.l[0],
(size_t)sz) == -1) {
if (!nf)
goto badret;
data.l[0] = 0;
}
}
}
if (aligndebug) {
if (sz == 16) {
printf("data %x %x %x %x\n",
data.i[0], data.i[1], data.i[2], data.c[3]);
} else {
printf("data %x %x %x %x %x %x %x %x\n",
data.c[0], data.c[1], data.c[2], data.c[3],
data.c[4], data.c[5], data.c[6], data.c[7]);
}
}
if (floatflg) { /* if fpu_exists write fpu reg */
klwp_id_t lwp = ttolwp(curthread);
kfpu_t *fp = lwptofpu(lwp);
/* Ensure fp has been enabled */
if (fpu_exists) {
if (!(_fp_read_fprs() & FPRS_FEF))
fp_enable();
} else {
if (!fp->fpu_en)
fp_enable();
}
/* if fpu_exists read fpu reg */
if (fpu_exists) {
if (fsrflg) {
_fp_write_pfsr(&data.l[0]);
} else {
if (sz == 4)
_fp_write_pfreg(
(unsigned *)&data.i[1], rd);
if (sz >= 8)
_fp_write_pdreg(
&data.l[0], rd);
if (sz == 16)
_fp_write_pdreg(
&data.l[1], rd+1);
}
} else {
if (fsrflg) {
fp->fpu_fsr = data.l[0];
} else {
if (sz == 4)
fp->fpu_fr.fpu_regs[rd] =
(unsigned)data.i[1];
if (sz >= 8)
fp->fpu_fr.fpu_dregs[rd] =
data.l[0];
if (sz == 16)
fp->fpu_fr.fpu_dregs[rd+1] =
data.l[1];
}
}
} else {
if (lddstdflg) { /* split the data */
if (ltlend) {
/*
* For LDD, each 32-bit word is byte-
* swapped individually. We didn't
* do that above, but this will give
* us the desired result.
*/
data.i[3] = data.i[0];
} else {
data.i[3] = data.i[1];
data.i[1] = data.i[0];
}
data.i[0] = 0;
data.i[2] = 0;
if (putreg(&data.l[0], rp, rd, badaddr) == -1)
goto badret;
if (putreg(&data.l[1], rp, rd+1, badaddr) == -1)
goto badret;
} else {
if (putreg(&data.l[0], rp, rd, badaddr) == -1)
goto badret;
}
}
}
return (SIMU_SUCCESS);
badret:
return (SIMU_FAULT);
}
int
simulate_lddstd(struct regs *rp, caddr_t *badaddr)
{
uint_t inst, op3, asi = 0;
uint_t rd, rs1, rs2;
int nf = 0, ltlend = 0, usermode;
int immflg;
uint64_t reven;
uint64_t rodd;
caddr_t addr;
uint64_t val;
uint64_t data;
usermode = USERMODE(rp->r_tstate);
if (usermode)
inst = fetch_user_instr((caddr_t)rp->r_pc);
else
inst = *(uint_t *)rp->r_pc;
op3 = (inst >> 19) & 0x3f;
rd = (inst >> 25) & 0x1f;
rs1 = (inst >> 14) & 0x1f;
rs2 = inst & 0x1f;
immflg = (inst >> 13) & 1;
if (USERMODE(rp->r_tstate))
(void) flush_user_windows_to_stack(NULL);
else
flush_windows();
if ((op3 >> 4) & 1) { /* is this LDDA/STDA? */
if (immflg) {
asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
TSTATE_ASI_MASK;
} else {
asi = (inst >> 5) & 0xff;
}
switch (asi) {
case ASI_P:
case ASI_S:
break;
case ASI_PNF:
case ASI_SNF:
nf = 1;
break;
case ASI_PL:
case ASI_SL:
ltlend = 1;
break;
case ASI_PNFL:
case ASI_SNFL:
ltlend = 1;
nf = 1;
break;
case ASI_AIUP:
case ASI_AIUS:
usermode = 1;
break;
case ASI_AIUPL:
case ASI_AIUSL:
usermode = 1;
ltlend = 1;
break;
default:
return (SIMU_ILLEGAL);
}
}
if (getreg(rp, rs1, &val, badaddr))
return (SIMU_FAULT);
addr = (caddr_t)val; /* convert to 32/64 bit address */
/* check immediate bit and use immediate field or reg (rs2) */
if (immflg) {
int imm;
imm = inst & 0x1fff; /* mask out immediate field */
imm <<= 19; /* sign extend it */
imm >>= 19;
addr += imm; /* compute address */
} else {
if (getreg(rp, rs2, &val, badaddr))
return (SIMU_FAULT);
addr += val;
}
/*
* T_UNIMP_LDD and T_UNIMP_STD are higher priority than
* T_ALIGNMENT. So we have to make sure that the address is
* kosher before trying to use it, because the hardware hasn't
* checked it for us yet.
*/
if (((uintptr_t)addr & 0x7) != 0) {
if (curproc->p_fixalignment)
return (do_unaligned(rp, badaddr));
else
return (SIMU_UNALIGN);
}
/*
* If this is a 32-bit program, chop the address accordingly. The
* intermediate uintptr_t casts prevent warnings under a certain
* compiler, and the temporary 32 bit storage is intended to force
* proper code generation and break up what would otherwise be a
* quadruple cast.
*/
if (curproc->p_model == DATAMODEL_ILP32 && usermode) {
caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
addr = (caddr_t)(uintptr_t)addr32;
}
if ((inst >> 21) & 1) { /* store */
if (getreg(rp, rd, &reven, badaddr))
return (SIMU_FAULT);
if (getreg(rp, rd+1, &rodd, badaddr))
return (SIMU_FAULT);
if (ltlend) {
reven = BSWAP_32(reven);
rodd = BSWAP_32(rodd);
}
data = (reven << 32) | rodd;
if (usermode) {
if (suword64_nowatch(addr, data) == -1)
return (SIMU_FAULT);
} else {
*(uint64_t *)addr = data;
}
} else { /* load */
if (usermode) {
if (fuword64_nowatch(addr, &data)) {
if (nf)
data = 0;
else
return (SIMU_FAULT);
}
} else
data = *(uint64_t *)addr;
reven = (data >> 32);
rodd = (uint64_t)(uint32_t)data;
if (ltlend) {
reven = BSWAP_32(reven);
rodd = BSWAP_32(rodd);
}
if (putreg(&reven, rp, rd, badaddr) == -1)
return (SIMU_FAULT);
if (putreg(&rodd, rp, rd+1, badaddr) == -1)
return (SIMU_FAULT);
}
return (SIMU_SUCCESS);
}
/*
* simulate popc
*/
static int
simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst)
{
uint_t rd, rs2, rs1;
uint_t immflg;
uint64_t val, cnt = 0;
rd = (inst >> 25) & 0x1f;
rs1 = (inst >> 14) & 0x1f;
rs2 = inst & 0x1f;
immflg = (inst >> 13) & 1;
if (rs1 > 0)
return (SIMU_ILLEGAL);
(void) flush_user_windows_to_stack(NULL);
/* check immediate bit and use immediate field or reg (rs2) */
if (immflg) {
int64_t imm;
imm = inst & 0x1fff; /* mask out immediate field */
imm <<= 51; /* sign extend it */
imm >>= 51;
if (imm != 0) {
for (cnt = 0; imm != 0; imm &= imm-1)
cnt++;
}
} else {
if (getreg(rp, rs2, &val, badaddr))
return (SIMU_FAULT);
if (val != 0) {
for (cnt = 0; val != 0; val &= val-1)
cnt++;
}
}
if (putreg(&cnt, rp, rd, badaddr) == -1)
return (SIMU_FAULT);
return (SIMU_SUCCESS);
}
/*
* simulate mulscc
*/
static int
simulate_mulscc(struct regs *rp, caddr_t *badaddr, uint_t inst)
{
uint32_t s1, s2;
uint32_t c, d, v;
uint_t rd, rs1;
int64_t d64;
uint64_t ud64;
uint64_t drs1;
(void) flush_user_windows_to_stack(NULL);
if ((inst >> 13) & 1) { /* immediate */
d64 = inst & 0x1fff;
d64 <<= 51; /* sign extend it */
d64 >>= 51;
} else {
uint_t rs2;
uint64_t drs2;
if (inst & 0x1fe0) {
return (SIMU_ILLEGAL);
}
rs2 = inst & 0x1f;
if (getreg(rp, rs2, &drs2, badaddr)) {
return (SIMU_FAULT);
}
d64 = (int64_t)drs2;
}
rs1 = (inst >> 14) & 0x1f;
if (getreg(rp, rs1, &drs1, badaddr)) {
return (SIMU_FAULT);
}
/* icc.n xor icc.v */
s1 = ((rp->r_tstate & TSTATE_IN) >> (TSTATE_CCR_SHIFT + 3)) ^
((rp->r_tstate & TSTATE_IV) >> (TSTATE_CCR_SHIFT + 1));
s1 = (s1 << 31) | (((uint32_t)drs1) >> 1);
if (rp->r_y & 1) {
s2 = (uint32_t)d64;
} else {
s2 = 0;
}
d = s1 + s2;
ud64 = (uint64_t)d;
/* set the icc flags */
v = (s1 & s2 & ~d) | (~s1 & ~s2 & d);
c = (s1 & s2) | (~d & (s1 | s2));
rp->r_tstate &= ~TSTATE_ICC;
rp->r_tstate |= (uint64_t)((c >> 31) & 1) << (TSTATE_CCR_SHIFT + 0);
rp->r_tstate |= (uint64_t)((v >> 31) & 1) << (TSTATE_CCR_SHIFT + 1);
rp->r_tstate |= (uint64_t)(d ? 0 : 1) << (TSTATE_CCR_SHIFT + 2);
rp->r_tstate |= (uint64_t)((d >> 31) & 1) << (TSTATE_CCR_SHIFT + 3);
if (rp->r_tstate & TSTATE_IC) {
ud64 |= (1ULL << 32);
}
/* set the xcc flags */
rp->r_tstate &= ~TSTATE_XCC;
if (ud64 == 0) {
rp->r_tstate |= TSTATE_XZ;
}
rd = (inst >> 25) & 0x1f;
if (putreg(&ud64, rp, rd, badaddr)) {
return (SIMU_FAULT);
}
d64 = (drs1 << 32) | (uint32_t)rp->r_y;
d64 >>= 1;
rp->r_y = (uint32_t)d64;
return (SIMU_SUCCESS);
}
/*
* simulate unimplemented instructions (popc, ldqf{a}, stqf{a})
*/
int
simulate_unimp(struct regs *rp, caddr_t *badaddr)
{
uint_t inst, optype, op3, asi;
uint_t rs1, rd;
uint_t ignor, i;
machpcb_t *mpcb = lwptompcb(ttolwp(curthread));
int nomatch = 0;
caddr_t addr = (caddr_t)rp->r_pc;
struct as *as;
caddr_t ka;
pfn_t pfnum;
page_t *pp;
proc_t *p = ttoproc(curthread);
struct seg *mapseg;
struct segvn_data *svd;
ASSERT(USERMODE(rp->r_tstate));
inst = fetch_user_instr(addr);
if (inst == (uint_t)-1) {
mpcb->mpcb_illexcaddr = addr;
mpcb->mpcb_illexcinsn = (uint32_t)-1;
return (SIMU_ILLEGAL);
}
/*
* When fixing dirty v8 instructions there's a race if two processors
* are executing the dirty executable at the same time. If one
* cleans the instruction as the other is executing it the second
* processor will see a clean instruction when it comes through this
* code and will return SIMU_ILLEGAL. To work around the race
* this code will keep track of the last illegal instruction seen
* by each lwp and will only take action if the illegal instruction
* is repeatable.
*/
if (addr != mpcb->mpcb_illexcaddr ||
inst != mpcb->mpcb_illexcinsn)
nomatch = 1;
mpcb->mpcb_illexcaddr = addr;
mpcb->mpcb_illexcinsn = inst;
/* instruction fields */
i = (inst >> 13) & 0x1;
rd = (inst >> 25) & 0x1f;
optype = (inst >> 30) & 0x3;
op3 = (inst >> 19) & 0x3f;
ignor = (inst >> 5) & 0xff;
if (IS_IBIT_SET(inst)) {
asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
TSTATE_ASI_MASK);
} else {
asi = ignor;
}
if (IS_VIS1(optype, op3) ||
IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi) ||
IS_FLOAT_QUAD_OP(optype, op3)) {
klwp_t *lwp = ttolwp(curthread);
kfpu_t *fp = lwptofpu(lwp);
if (fpu_exists) {
if (!(_fp_read_fprs() & FPRS_FEF))
fp_enable();
_fp_read_pfsr(&fp->fpu_fsr);
} else {
if (!fp->fpu_en)
fp_enable();
}
fp_precise(rp);
return (SIMU_RETRY);
}
if (optype == 2 && op3 == IOP_V8_POPC) {
return (simulate_popc(rp, badaddr, inst));
} else if (optype == 3 && op3 == IOP_V8_POPC) {
return (SIMU_ILLEGAL);
} else if (optype == OP_V8_ARITH && op3 == IOP_V8_MULScc) {
return (simulate_mulscc(rp, badaddr, inst));
}
if (optype == OP_V8_LDSTR) {
if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA ||
op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA)
return (do_unaligned(rp, badaddr));
}
/* This is a new instruction so illexccnt should also be set. */
if (nomatch) {
mpcb->mpcb_illexccnt = 0;
return (SIMU_RETRY);
}
/*
* In order to keep us from entering into an infinite loop while
* attempting to clean up faulty instructions, we will return
* SIMU_ILLEGAL once we've cleaned up the instruction as much
* as we can, and still end up here.
*/
if (mpcb->mpcb_illexccnt >= 3)
return (SIMU_ILLEGAL);
mpcb->mpcb_illexccnt += 1;
/*
* The rest of the code handles v8 binaries with instructions
* that have dirty (non-zero) bits in reserved or 'ignored'
* fields; these will cause core dumps on v9 machines.
*
* We only clean dirty instructions in 32-bit programs (ie, v8)
* running on SPARCv9 processors. True v9 programs are forced
* to use the instruction set as intended.
*/
if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32)
return (SIMU_ILLEGAL);
switch (optype) {
case OP_V8_BRANCH:
case OP_V8_CALL:
return (SIMU_ILLEGAL); /* these don't have ignored fields */
/*NOTREACHED*/
case OP_V8_ARITH:
switch (op3) {
case IOP_V8_RETT:
if (rd == 0 && !(i == 0 && ignor))
return (SIMU_ILLEGAL);
if (rd)
inst &= ~(0x1f << 25);
if (i == 0 && ignor)
inst &= ~(0xff << 5);
break;
case IOP_V8_TCC:
if (i == 0 && ignor != 0) {
inst &= ~(0xff << 5);
} else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) {
inst &= ~(0x3f << 7);
} else {
return (SIMU_ILLEGAL);
}
break;
case IOP_V8_JMPL:
case IOP_V8_RESTORE:
case IOP_V8_SAVE:
if ((op3 == IOP_V8_RETT && rd) ||
(i == 0 && ignor)) {
inst &= ~(0xff << 5);
} else {
return (SIMU_ILLEGAL);
}
break;
case IOP_V8_FCMP:
if (rd == 0)
return (SIMU_ILLEGAL);
inst &= ~(0x1f << 25);
break;
case IOP_V8_RDASR:
rs1 = ((inst >> 14) & 0x1f);
if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) {
/*
* The instruction specifies an invalid
* state register - better bail out than
* "fix" it when we're not sure what was
* intended.
*/
return (SIMU_ILLEGAL);
}
/*
* Note: this case includes the 'stbar'
* instruction (rs1 == 15 && i == 0).
*/
if ((ignor = (inst & 0x3fff)) != 0)
inst &= ~(0x3fff);
break;
case IOP_V8_SRA:
case IOP_V8_SRL:
case IOP_V8_SLL:
if (ignor == 0)
return (SIMU_ILLEGAL);
inst &= ~(0xff << 5);
break;
case IOP_V8_ADD:
case IOP_V8_AND:
case IOP_V8_OR:
case IOP_V8_XOR:
case IOP_V8_SUB:
case IOP_V8_ANDN:
case IOP_V8_ORN:
case IOP_V8_XNOR:
case IOP_V8_ADDC:
case IOP_V8_UMUL:
case IOP_V8_SMUL:
case IOP_V8_SUBC:
case IOP_V8_UDIV:
case IOP_V8_SDIV:
case IOP_V8_ADDcc:
case IOP_V8_ANDcc:
case IOP_V8_ORcc:
case IOP_V8_XORcc:
case IOP_V8_SUBcc:
case IOP_V8_ANDNcc:
case IOP_V8_ORNcc:
case IOP_V8_XNORcc:
case IOP_V8_ADDCcc:
case IOP_V8_UMULcc:
case IOP_V8_SMULcc:
case IOP_V8_SUBCcc:
case IOP_V8_UDIVcc:
case IOP_V8_SDIVcc:
case IOP_V8_TADDcc:
case IOP_V8_TSUBcc:
case IOP_V8_TADDccTV:
case IOP_V8_TSUBccTV:
case IOP_V8_MULScc:
case IOP_V8_WRASR:
case IOP_V8_FLUSH:
if (i != 0 || ignor == 0)
return (SIMU_ILLEGAL);
inst &= ~(0xff << 5);
break;
default:
return (SIMU_ILLEGAL);
}
break;
case OP_V8_LDSTR:
switch (op3) {
case IOP_V8_STFSR:
case IOP_V8_LDFSR:
if (rd == 0 && !(i == 0 && ignor))
return (SIMU_ILLEGAL);
if (rd)
inst &= ~(0x1f << 25);
if (i == 0 && ignor)
inst &= ~(0xff << 5);
break;
default:
if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) &&
i == 0 && ignor)
inst &= ~(0xff << 5);
else
return (SIMU_ILLEGAL);
break;
}
break;
default:
return (SIMU_ILLEGAL);
}
as = p->p_as;
AS_LOCK_ENTER(as, RW_READER);
mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0);
ASSERT(mapseg != NULL);
svd = (struct segvn_data *)mapseg->s_data;
/*
* We only create COW page for MAP_PRIVATE mappings.
*/
SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
if ((svd->type & MAP_TYPE) & MAP_SHARED) {
SEGVN_LOCK_EXIT(as, &svd->lock);
AS_LOCK_EXIT(as);
return (SIMU_ILLEGAL);
}
SEGVN_LOCK_EXIT(as, &svd->lock);
AS_LOCK_EXIT(as);
/*
* A "flush" instruction using the user PC's vaddr will not work
* here, at least on Spitfire. Instead we create a temporary kernel
* mapping to the user's text page, then modify and flush that.
* Break COW by locking user page.
*/
if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE,
F_SOFTLOCK, S_READ))
return (SIMU_FAULT);
AS_LOCK_ENTER(as, RW_READER);
pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc);
AS_LOCK_EXIT(as);
if (pf_is_memory(pfnum)) {
pp = page_numtopp_nolock(pfnum);
ASSERT(pp == NULL || PAGE_LOCKED(pp));
} else {
(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
PAGESIZE, F_SOFTUNLOCK, S_READ);
return (SIMU_FAULT);
}
AS_LOCK_ENTER(as, RW_READER);
ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc);
*(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst;
doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE));
ppmapout(ka);
AS_LOCK_EXIT(as);
(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
PAGESIZE, F_SOFTUNLOCK, S_READ);
return (SIMU_RETRY);
}
/*
* Simulate a "rd %tick" or "rd %stick" (%asr24) instruction.
*/
int
simulate_rdtick(struct regs *rp)
{
uint_t inst, op, op3, rd, rs1, i;
caddr_t badaddr;
inst = fetch_user_instr((caddr_t)rp->r_pc);
op = (inst >> 30) & 0x3;
rd = (inst >> 25) & 0x1F;
op3 = (inst >> 19) & 0x3F;
i = (inst >> 13) & 0x1;
/*
* Make sure this is either a %tick read (rs1 == 0x4) or
* a %stick read (rs1 == 0x18) instruction.
*/
if (op == 2 && op3 == 0x28 && i == 0) {
rs1 = (inst >> 14) & 0x1F;
if (rs1 == 0x4) {
uint64_t tick;
(void) flush_user_windows_to_stack(NULL);
tick = gettick_counter();
if (putreg(&tick, rp, rd, &badaddr) == 0)
return (SIMU_SUCCESS);
} else if (rs1 == 0x18) {
uint64_t stick;
(void) flush_user_windows_to_stack(NULL);
stick = gethrtime_unscaled();
if (putreg(&stick, rp, rd, &badaddr) == 0)
return (SIMU_SUCCESS);
}
}
return (SIMU_FAULT);
}
/*
* Get the value of a register for instruction simulation
* by using the regs or window structure pointers.
* Return 0 for success, and -1 for failure. If there is a failure,
* save the faulting address using badaddr pointer.
* We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
* Don't truncate globals/outs for 32 bit programs, for v8+ support.
*/
int
getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr)
{
uint64_t *rgs, *sp;
int rv = 0;
rgs = (uint64_t *)&rp->r_ps; /* globals and outs */
sp = (uint64_t *)rp->r_sp; /* ins and locals */
if (reg == 0) {
*val = 0;
} else if (reg < 16) {
*val = rgs[reg];
} else if (IS_V9STACK(sp)) {
uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
uint64_t *addr = (uint64_t *)&rw[reg - 16];
uint64_t res;
if (USERMODE(rp->r_tstate)) {
if (fuword64_nowatch(addr, &res) == -1) {
*badaddr = (caddr_t)addr;
rv = -1;
}
} else {
res = *addr;
}
*val = res;
} else {
caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
uint32_t *addr = (uint32_t *)&rw[reg - 16];
uint32_t res;
if (USERMODE(rp->r_tstate)) {
if (fuword32_nowatch(addr, &res) == -1) {
*badaddr = (caddr_t)addr;
rv = -1;
}
} else {
res = *addr;
}
*val = (uint64_t)res;
}
return (rv);
}
/*
* Set the value of a register after instruction simulation
* by using the regs or window structure pointers.
* Return 0 for succes -1 failure.
* save the faulting address using badaddr pointer.
* We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
* Don't truncate globals/outs for 32 bit programs, for v8+ support.
*/
int
putreg(uint64_t *data, struct regs *rp, uint_t reg, caddr_t *badaddr)
{
uint64_t *rgs, *sp;
int rv = 0;
rgs = (uint64_t *)&rp->r_ps; /* globals and outs */
sp = (uint64_t *)rp->r_sp; /* ins and locals */
if (reg == 0) {
return (0);
} else if (reg < 16) {
rgs[reg] = *data;
} else if (IS_V9STACK(sp)) {
uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
uint64_t *addr = (uint64_t *)&rw[reg - 16];
uint64_t res;
if (USERMODE(rp->r_tstate)) {
struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
res = *data;
if (suword64_nowatch(addr, res) != 0) {
*badaddr = (caddr_t)addr;
rv = -1;
}
/*
* We have changed a local or in register;
* nuke the watchpoint return windows.
*/
mpcb->mpcb_rsp[0] = NULL;
mpcb->mpcb_rsp[1] = NULL;
} else {
res = *data;
*addr = res;
}
} else {
caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
uint32_t *addr = (uint32_t *)&rw[reg - 16];
uint32_t res;
if (USERMODE(rp->r_tstate)) {
struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
res = (uint_t)*data;
if (suword32_nowatch(addr, res) != 0) {
*badaddr = (caddr_t)addr;
rv = -1;
}
/*
* We have changed a local or in register;
* nuke the watchpoint return windows.
*/
mpcb->mpcb_rsp[0] = NULL;
mpcb->mpcb_rsp[1] = NULL;
} else {
res = (uint_t)*data;
*addr = res;
}
}
return (rv);
}
/*
* Calculate a memory reference address from instruction
* operands, used to return the address of a fault, instead
* of the instruction when an error occurs. This is code that is
* common with most of the routines that simulate instructions.
*/
int
calc_memaddr(struct regs *rp, caddr_t *badaddr)
{
uint_t inst;
uint_t rd, rs1, rs2;
int sz;
int immflg;
int floatflg;
caddr_t addr;
uint64_t val;
if (USERMODE(rp->r_tstate))
inst = fetch_user_instr((caddr_t)rp->r_pc);
else
inst = *(uint_t *)rp->r_pc;
rd = (inst >> 25) & 0x1f;
rs1 = (inst >> 14) & 0x1f;
rs2 = inst & 0x1f;
floatflg = (inst >> 24) & 1;
immflg = (inst >> 13) & 1;
if (floatflg) {
switch ((inst >> 19) & 3) { /* map size bits to a number */
case 0: sz = 4; break; /* ldf/stf */
case 1: return (0); /* ld[x]fsr/st[x]fsr */
case 2: sz = 16; break; /* ldqf/stqf */
case 3: sz = 8; break; /* lddf/stdf */
}
/*
* Fix to access extra double register encoding plus
* compensate to access the correct fpu_dreg.
*/
if (sz > 4) {
if ((rd & 1) == 1)
rd = (rd & 0x1e) | 0x20;
rd = rd >> 1;
}
} else {
switch ((inst >> 19) & 0xf) { /* map size bits to a number */
case 0: /* lduw */
case 4: /* stw */
case 8: /* ldsw */
case 0xf: /* swap */
sz = 4; break;
case 1: /* ldub */
case 5: /* stb */
case 9: /* ldsb */
case 0xd: /* ldstub */
sz = 1; break;
case 2: /* lduh */
case 6: /* sth */
case 0xa: /* ldsh */
sz = 2; break;
case 3: /* ldd */
case 7: /* std */
case 0xb: /* ldx */
case 0xe: /* stx */
sz = 8; break;
}
}
if (USERMODE(rp->r_tstate))
(void) flush_user_windows_to_stack(NULL);
else
flush_windows();
if (getreg(rp, rs1, &val, badaddr))
return (SIMU_FAULT);
addr = (caddr_t)val;
/* check immediate bit and use immediate field or reg (rs2) */
if (immflg) {
int imm;
imm = inst & 0x1fff; /* mask out immediate field */
imm <<= 19; /* sign extend it */
imm >>= 19;
addr += imm; /* compute address */
} else {
if (getreg(rp, rs2, &val, badaddr))
return (SIMU_FAULT);
addr += val;
}
/*
* If this is a 32-bit program, chop the address accordingly. The
* intermediate uintptr_t casts prevent warnings under a certain
* compiler, and the temporary 32 bit storage is intended to force
* proper code generation and break up what would otherwise be a
* quadruple cast.
*/
if (curproc->p_model == DATAMODEL_ILP32 && USERMODE(rp->r_tstate)) {
caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
addr = (caddr_t)(uintptr_t)addr32;
}
*badaddr = addr;
return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS);
}
/*
* Return the size of a load or store instruction (1, 2, 4, 8, 16, 64).
* Also compute the precise address by instruction disassembly.
* (v9 page faults only provide the page address via the hardware.)
* Return 0 on failure (not a load or store instruction).
*/
int
instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr)
{
uint_t inst, op3, asi;
uint_t rd, rs1, rs2;
int sz = 0;
int immflg;
int floatflg;
caddr_t addr;
caddr_t badaddr;
uint64_t val;
if (rdwr == S_EXEC) {
*addrp = (caddr_t)rp->r_pc;
return (4);
}
/*
* Fetch the instruction from user-level.
* We would like to assert this:
* ASSERT(USERMODE(rp->r_tstate));
* but we can't because we can reach this point from a
* register window underflow/overflow and the v9 wbuf
* traps call trap() with T_USER even though r_tstate
* indicates a system trap, not a user trap.
*/
inst = fetch_user_instr((caddr_t)rp->r_pc);
op3 = (inst >> 19) & 0x3f;
rd = (inst >> 25) & 0x1f;
rs1 = (inst >> 14) & 0x1f;
rs2 = inst & 0x1f;
floatflg = (inst >> 24) & 1;
immflg = (inst >> 13) & 1;
/* if not load or store do nothing. can't happen? */
if ((inst >> 30) != 3)
return (0);
if (immflg)
asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
TSTATE_ASI_MASK);
else
asi = (inst >> 5) & 0xff;
if (floatflg) {
/* check for ld/st alternate and highest defined V9 asi */
if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) {
sz = extended_asi_size(asi);
} else {
switch (op3 & 3) {
case 0:
sz = 4; /* ldf/stf/cas */
break;
case 1:
if (rd == 0)
sz = 4; /* ldfsr/stfsr */
else
sz = 8; /* ldxfsr/stxfsr */
break;
case 2:
if (op3 == 0x3e)
sz = 8; /* casx */
else
sz = 16; /* ldqf/stqf */
break;
case 3:
sz = 8; /* lddf/stdf */
break;
}
}
} else {
switch (op3 & 0xf) { /* map size bits to a number */
case 0: /* lduw */
case 4: /* stw */
case 8: /* ldsw */
case 0xf: /* swap */
sz = 4; break;
case 1: /* ldub */
case 5: /* stb */
case 9: /* ldsb */
case 0xd: /* ldstub */
sz = 1; break;
case 2: /* lduh */
case 6: /* sth */
case 0xa: /* ldsh */
sz = 2; break;
case 3: /* ldd */
case 7: /* std */
case 0xb: /* ldx */
case 0xe: /* stx */
sz = 8; break;
}
}
if (sz == 0) /* can't happen? */
return (0);
(void) flush_user_windows_to_stack(NULL);
if (getreg(rp, rs1, &val, &badaddr))
return (0);
addr = (caddr_t)val;
/* cas/casx don't use rs2 / simm13 to compute the address */
if ((op3 & 0x3d) != 0x3c) {
/* check immediate bit and use immediate field or reg (rs2) */
if (immflg) {
int imm;
imm = inst & 0x1fff; /* mask out immediate field */
imm <<= 19; /* sign extend it */
imm >>= 19;
addr += imm; /* compute address */
} else {
/*
* asi's in the 0xCx range are partial store
* instructions. For these, rs2 is a mask, not part of
* the address.
*/
if (!(floatflg && (asi & 0xf0) == 0xc0)) {
if (getreg(rp, rs2, &val, &badaddr))
return (0);
addr += val;
}
}
}
/*
* If this is a 32-bit program, chop the address accordingly. The
* intermediate uintptr_t casts prevent warnings under a certain
* compiler, and the temporary 32 bit storage is intended to force
* proper code generation and break up what would otherwise be a
* quadruple cast.
*/
if (curproc->p_model == DATAMODEL_ILP32) {
caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
addr = (caddr_t)(uintptr_t)addr32;
}
*addrp = addr;
ASSERT(sz != 0);
return (sz);
}
/*
* Fetch an instruction from user-level.
* Deal with watchpoints, if they are in effect.
*/
int32_t
fetch_user_instr(caddr_t vaddr)
{
proc_t *p = curproc;
int32_t instr;
/*
* If this is a 32-bit program, chop the address accordingly. The
* intermediate uintptr_t casts prevent warnings under a certain
* compiler, and the temporary 32 bit storage is intended to force
* proper code generation and break up what would otherwise be a
* quadruple cast.
*/
if (p->p_model == DATAMODEL_ILP32) {
caddr32_t vaddr32 = (caddr32_t)(uintptr_t)vaddr;
vaddr = (caddr_t)(uintptr_t)vaddr32;
}
if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1)
instr = -1;
return (instr);
}