us3_common_asm.s revision 19f938d5a97467cae5809ba3cee189b02c8e03b7
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Assembly code support for Cheetah/Cheetah+ modules
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#if !defined(lint)
#include "assym.h"
#endif /* !lint */
#include <sys/asm_linkage.h>
#include <sys/mmu.h>
#include <vm/hat_sfmmu.h>
#include <sys/machparam.h>
#include <sys/machcpuvar.h>
#include <sys/machthread.h>
#include <sys/machtrap.h>
#include <sys/privregs.h>
#include <sys/trap.h>
#include <sys/cheetahregs.h>
#include <sys/us3_module.h>
#include <sys/xc_impl.h>
#include <sys/intreg.h>
#include <sys/async.h>
#include <sys/clock.h>
#include <sys/cheetahasm.h>
#include <sys/cmpregs.h>
#ifdef TRAPTRACE
#include <sys/traptrace.h>
#endif /* TRAPTRACE */
#if !defined(lint)
/* BEGIN CSTYLED */
#define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3) \
ldxa [%g0]ASI_DCU, tmp1 ;\
btst DCU_DC, tmp1 /* is dcache enabled? */ ;\
bz,pn %icc, 1f ;\
ASM_LD(tmp1, dcache_linesize) ;\
ASM_LD(tmp2, dflush_type) ;\
cmp tmp2, FLUSHPAGE_TYPE ;\
be,pt %icc, 2f ;\
nop ;\
sllx arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */ ;\
ASM_LD(tmp3, dcache_size) ;\
cmp tmp2, FLUSHMATCH_TYPE ;\
be,pt %icc, 3f ;\
nop ;\
/* \
* flushtype = FLUSHALL_TYPE, flush the whole thing \
* tmp3 = cache size \
* tmp1 = cache line size \
*/ \
sub tmp3, tmp1, tmp2 ;\
4: \
stxa %g0, [tmp2]ASI_DC_TAG ;\
membar #Sync ;\
cmp %g0, tmp2 ;\
bne,pt %icc, 4b ;\
sub tmp2, tmp1, tmp2 ;\
ba,pt %icc, 1f ;\
nop ;\
/* \
* flushtype = FLUSHPAGE_TYPE \
* arg1 = pfn \
* arg2 = virtual color \
* tmp1 = cache line size \
* tmp2 = tag from cache \
* tmp3 = counter \
*/ \
2: \
set MMU_PAGESIZE, tmp3 ;\
sllx arg1, MMU_PAGESHIFT, arg1 /* pfn to 43 bit PA */ ;\
sub tmp3, tmp1, tmp3 ;\
4: \
stxa %g0, [arg1 + tmp3]ASI_DC_INVAL ;\
membar #Sync ;\
5: \
cmp %g0, tmp3 ;\
bnz,pt %icc, 4b /* branch if not done */ ;\
sub tmp3, tmp1, tmp3 ;\
ba,pt %icc, 1f ;\
nop ;\
/* \
* flushtype = FLUSHMATCH_TYPE \
* arg1 = tag to compare against \
* tmp1 = cache line size \
* tmp3 = cache size \
* arg2 = counter \
* tmp2 = cache tag \
*/ \
3: \
sub tmp3, tmp1, arg2 ;\
4: \
ldxa [arg2]ASI_DC_TAG, tmp2 /* read tag */ ;\
btst CHEETAH_DC_VBIT_MASK, tmp2 ;\
bz,pn %icc, 5f /* br if no valid sub-blocks */ ;\
andn tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\
cmp tmp2, arg1 ;\
bne,pn %icc, 5f /* branch if tag miss */ ;\
nop ;\
stxa %g0, [arg2]ASI_DC_TAG ;\
membar #Sync ;\
5: \
cmp %g0, arg2 ;\
bne,pt %icc, 4b /* branch if not done */ ;\
sub arg2, tmp1, arg2 ;\
1:
/* END CSTYLED */
#endif /* !lint */
/*
* Cheetah MMU and Cache operations.
*/
#if defined(lint)
/* ARGSUSED */
void
vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
{}
#else /* lint */
ENTRY_NP(vtag_flushpage)
/*
* flush page from the tlb
*
* %o0 = vaddr
* %o1 = sfmmup
*/
rdpr %pstate, %o5
#ifdef DEBUG
PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
#endif /* DEBUG */
/*
* disable ints
*/
andn %o5, PSTATE_IE, %o4
wrpr %o4, 0, %pstate
/*
* Then, blow out the tlb
* Interrupts are disabled to prevent the primary ctx register
* from changing underneath us.
*/
sethi %hi(ksfmmup), %o3
ldx [%o3 + %lo(ksfmmup)], %o3
cmp %o3, %o1
bne,pt %xcc, 1f ! if not kernel as, go to 1
sethi %hi(FLUSH_ADDR), %o3
/*
* For Kernel demaps use primary. type = page implicitly
*/
stxa %g0, [%o0]ASI_DTLB_DEMAP /* dmmu flush for KCONTEXT */
stxa %g0, [%o0]ASI_ITLB_DEMAP /* immu flush for KCONTEXT */
flush %o3
retl
wrpr %g0, %o5, %pstate /* enable interrupts */
1:
/*
* User demap. We need to set the primary context properly.
* Secondary context cannot be used for Cheetah IMMU.
* %o0 = vaddr
* %o1 = sfmmup
* %o3 = FLUSH_ADDR
*/
SFMMU_CPU_CNUM(%o1, %g1, %g2) ! %g1 = sfmmu cnum on this CPU
ldub [%o1 + SFMMU_CEXT], %o4 ! %o4 = sfmmup->sfmmu_cext
sll %o4, CTXREG_EXT_SHIFT, %o4
or %g1, %o4, %g1 ! %g1 = primary pgsz | cnum
wrpr %g0, 1, %tl
set MMU_PCONTEXT, %o4
or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
ldxa [%o4]ASI_DMMU, %o2 ! %o2 = save old ctxnum
srlx %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz
sllx %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz
or %g1, %o1, %g1 ! %g1 = nucleus pgsz | primary pgsz | cnum
stxa %g1, [%o4]ASI_DMMU ! wr new ctxum
stxa %g0, [%o0]ASI_DTLB_DEMAP
stxa %g0, [%o0]ASI_ITLB_DEMAP
stxa %o2, [%o4]ASI_DMMU /* restore old ctxnum */
flush %o3
wrpr %g0, 0, %tl
retl
wrpr %g0, %o5, %pstate /* enable interrupts */
SET_SIZE(vtag_flushpage)
#endif /* lint */
#if defined(lint)
void
vtag_flushall(void)
{}
#else /* lint */
ENTRY_NP2(vtag_flushall, demap_all)
/*
* flush the tlb
*/
sethi %hi(FLUSH_ADDR), %o3
set DEMAP_ALL_TYPE, %g1
stxa %g0, [%g1]ASI_DTLB_DEMAP
stxa %g0, [%g1]ASI_ITLB_DEMAP
flush %o3
retl
nop
SET_SIZE(demap_all)
SET_SIZE(vtag_flushall)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
{}
#else /* lint */
ENTRY_NP(vtag_flushpage_tl1)
/*
* x-trap to flush page from tlb and tsb
*
* %g1 = vaddr, zero-extended on 32-bit kernel
* %g2 = sfmmup
*
* assumes TSBE_TAG = 0
*/
srln %g1, MMU_PAGESHIFT, %g1
sethi %hi(ksfmmup), %g3
ldx [%g3 + %lo(ksfmmup)], %g3
cmp %g3, %g2
bne,pt %xcc, 1f ! if not kernel as, go to 1
slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */
/* We need to demap in the kernel context */
or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
stxa %g0, [%g1]ASI_DTLB_DEMAP
stxa %g0, [%g1]ASI_ITLB_DEMAP
retry
1:
/* We need to demap in a user context */
or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
SFMMU_CPU_CNUM(%g2, %g6, %g3) ! %g6 = sfmmu cnum on this CPU
ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext
sll %g4, CTXREG_EXT_SHIFT, %g4
or %g6, %g4, %g6 ! %g6 = pgsz | cnum
set MMU_PCONTEXT, %g4
ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */
srlx %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
or %g6, %g2, %g6 /* %g6 = nucleus pgsz | primary pgsz | cnum */
stxa %g6, [%g4]ASI_DMMU /* wr new ctxum */
stxa %g0, [%g1]ASI_DTLB_DEMAP
stxa %g0, [%g1]ASI_ITLB_DEMAP
stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */
retry
SET_SIZE(vtag_flushpage_tl1)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
{}
#else /* lint */
ENTRY_NP(vtag_flush_pgcnt_tl1)
/*
* x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
*
* %g1 = vaddr, zero-extended on 32-bit kernel
* %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
*
* NOTE: this handler relies on the fact that no
* interrupts or traps can occur during the loop
* issuing the TLB_DEMAP operations. It is assumed
* that interrupts are disabled and this code is
* fetching from the kernel locked text address.
*
* assumes TSBE_TAG = 0
*/
set SFMMU_PGCNT_MASK, %g4
and %g4, %g2, %g3 /* g3 = pgcnt - 1 */
add %g3, 1, %g3 /* g3 = pgcnt */
andn %g2, SFMMU_PGCNT_MASK, %g2 /* g2 = sfmmup */
srln %g1, MMU_PAGESHIFT, %g1
sethi %hi(ksfmmup), %g4
ldx [%g4 + %lo(ksfmmup)], %g4
cmp %g4, %g2
bne,pn %xcc, 1f /* if not kernel as, go to 1 */
slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */
/* We need to demap in the kernel context */
or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
set MMU_PAGESIZE, %g2 /* g2 = pgsize */
sethi %hi(FLUSH_ADDR), %g5
4:
stxa %g0, [%g1]ASI_DTLB_DEMAP
stxa %g0, [%g1]ASI_ITLB_DEMAP
flush %g5 ! flush required by immu
deccc %g3 /* decr pgcnt */
bnz,pt %icc,4b
add %g1, %g2, %g1 /* next page */
retry
1:
/*
* We need to demap in a user context
*
* g2 = sfmmup
* g3 = pgcnt
*/
SFMMU_CPU_CNUM(%g2, %g5, %g6) ! %g5 = sfmmu cnum on this CPU
or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext
sll %g4, CTXREG_EXT_SHIFT, %g4
or %g5, %g4, %g5
set MMU_PCONTEXT, %g4
ldxa [%g4]ASI_DMMU, %g6 /* rd old ctxnum */
srlx %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
or %g5, %g2, %g5 /* %g5 = nucleus pgsz | primary pgsz | cnum */
stxa %g5, [%g4]ASI_DMMU /* wr new ctxum */
set MMU_PAGESIZE, %g2 /* g2 = pgsize */
sethi %hi(FLUSH_ADDR), %g5
3:
stxa %g0, [%g1]ASI_DTLB_DEMAP
stxa %g0, [%g1]ASI_ITLB_DEMAP
flush %g5 ! flush required by immu
deccc %g3 /* decr pgcnt */
bnz,pt %icc,3b
add %g1, %g2, %g1 /* next page */
stxa %g6, [%g4]ASI_DMMU /* restore old ctxnum */
retry
SET_SIZE(vtag_flush_pgcnt_tl1)
#endif /* lint */
#if defined(lint)
/*ARGSUSED*/
void
vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
{}
#else /* lint */
ENTRY_NP(vtag_flushall_tl1)
/*
* x-trap to flush tlb
*/
set DEMAP_ALL_TYPE, %g4
stxa %g0, [%g4]ASI_DTLB_DEMAP
stxa %g0, [%g4]ASI_ITLB_DEMAP
retry
SET_SIZE(vtag_flushall_tl1)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
vac_flushpage(pfn_t pfnum, int vcolor)
{}
#else /* lint */
/*
* vac_flushpage(pfnum, color)
* Flush 1 8k page of the D-$ with physical page = pfnum
* Algorithm:
* The cheetah dcache is a 64k psuedo 4 way accaociative cache.
* It is virtual indexed, physically tagged cache.
*/
.seg ".data"
.align 8
.global dflush_type
dflush_type:
.word FLUSHPAGE_TYPE
ENTRY(vac_flushpage)
/*
* flush page from the d$
*
* %o0 = pfnum, %o1 = color
*/
DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
retl
nop
SET_SIZE(vac_flushpage)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
{}
#else /* lint */
ENTRY_NP(vac_flushpage_tl1)
/*
* x-trap to flush page from the d$
*
* %g1 = pfnum, %g2 = color
*/
DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
retry
SET_SIZE(vac_flushpage_tl1)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
vac_flushcolor(int vcolor, pfn_t pfnum)
{}
#else /* lint */
/*
* In UltraSPARC III flushcolor is same as as flushpage.
* This is because we have an ASI to flush dcache using physical
* address.
* Flushing dcache using physical address is faster because we
* don't have to deal with associativity of dcache.
* The arguments to vac_flushpage() and vac_flushcolor() are same but
* the order is reversed. this is because we maintain compatibility
* with spitfire, in which vac_flushcolor has only one argument, namely
* vcolor.
*/
ENTRY(vac_flushcolor)
/*
* %o0 = vcolor, %o1 = pfnum
*/
DCACHE_FLUSHPAGE(%o1, %o0, %o2, %o3, %o4)
retl
nop
SET_SIZE(vac_flushcolor)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum)
{}
#else /* lint */
ENTRY(vac_flushcolor_tl1)
/*
* %g1 = vcolor
* %g2 = pfnum
*/
DCACHE_FLUSHPAGE(%g2, %g1, %g3, %g4, %g5)
retry
SET_SIZE(vac_flushcolor_tl1)
#endif /* lint */
#if defined(lint)
int
idsr_busy(void)
{
return (0);
}
#else /* lint */
/*
* Determine whether or not the IDSR is busy.
* Entry: no arguments
* Returns: 1 if busy, 0 otherwise
*/
ENTRY(idsr_busy)
ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
clr %o0
btst IDSR_BUSY, %g1
bz,a,pt %xcc, 1f
mov 1, %o0
1:
retl
nop
SET_SIZE(idsr_busy)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
{}
/* ARGSUSED */
void
init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
{}
#else /* lint */
.global _dispatch_status_busy
_dispatch_status_busy:
.asciz "ASI_INTR_DISPATCH_STATUS error: busy"
.align 4
/*
* Setup interrupt dispatch data registers
* Entry:
* %o0 - function or inumber to call
* %o1, %o2 - arguments (2 uint64_t's)
*/
.seg "text"
ENTRY(init_mondo)
#ifdef DEBUG
!
! IDSR should not be busy at the moment
!
ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
btst IDSR_BUSY, %g1
bz,pt %xcc, 1f
nop
sethi %hi(_dispatch_status_busy), %o0
call panic
or %o0, %lo(_dispatch_status_busy), %o0
#endif /* DEBUG */
ALTENTRY(init_mondo_nocheck)
!
! interrupt vector dispatch data reg 0
!
1:
mov IDDR_0, %g1
mov IDDR_1, %g2
mov IDDR_2, %g3
stxa %o0, [%g1]ASI_INTR_DISPATCH
!
! interrupt vector dispatch data reg 1
!
stxa %o1, [%g2]ASI_INTR_DISPATCH
!
! interrupt vector dispatch data reg 2
!
stxa %o2, [%g3]ASI_INTR_DISPATCH
membar #Sync
retl
nop
SET_SIZE(init_mondo_nocheck)
SET_SIZE(init_mondo)
#endif /* lint */
#if !(defined(JALAPENO) || defined(SERRANO))
#if defined(lint)
/* ARGSUSED */
void
shipit(int upaid, int bn)
{ return; }
#else /* lint */
/*
* Ship mondo to aid using busy/nack pair bn
*/
ENTRY_NP(shipit)
sll %o0, IDCR_PID_SHIFT, %g1 ! IDCR<18:14> = agent id
sll %o1, IDCR_BN_SHIFT, %g2 ! IDCR<28:24> = b/n pair
or %g1, IDCR_OFFSET, %g1 ! IDCR<13:0> = 0x70
or %g1, %g2, %g1
stxa %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch
membar #Sync
retl
nop
SET_SIZE(shipit)
#endif /* lint */
#endif /* !(JALAPENO || SERRANO) */
#if defined(lint)
/* ARGSUSED */
void
flush_instr_mem(caddr_t vaddr, size_t len)
{}
#else /* lint */
/*
* flush_instr_mem:
* Flush 1 page of the I-$ starting at vaddr
* %o0 vaddr
* %o1 bytes to be flushed
* UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
* the stores from all processors so that a FLUSH instruction is only needed
* to ensure pipeline is consistent. This means a single flush is sufficient at
* the end of a sequence of stores that updates the instruction stream to
* ensure correct operation.
*/
ENTRY(flush_instr_mem)
flush %o0 ! address irrelevant
retl
nop
SET_SIZE(flush_instr_mem)
#endif /* lint */
#if defined(CPU_IMP_ECACHE_ASSOC)
#if defined(lint)
/* ARGSUSED */
uint64_t
get_ecache_ctrl(void)
{ return (0); }
#else /* lint */
ENTRY(get_ecache_ctrl)
GET_CPU_IMPL(%o0)
cmp %o0, JAGUAR_IMPL
!
! Putting an ASI access in the delay slot may
! cause it to be accessed, even when annulled.
!
bne 1f
nop
ldxa [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg
b 2f
nop
1:
ldxa [%g0]ASI_EC_CTRL, %o0 ! read Ch/Ch+ E$ control reg
2:
retl
nop
SET_SIZE(get_ecache_ctrl)
#endif /* lint */
#endif /* CPU_IMP_ECACHE_ASSOC */
#if !(defined(JALAPENO) || defined(SERRANO))
/*
* flush_ecache:
* %o0 - 64 bit physical address
* %o1 - ecache size
* %o2 - ecache linesize
*/
#if defined(lint)
/*ARGSUSED*/
void
flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize)
{}
#else /* !lint */
ENTRY(flush_ecache)
/*
* For certain CPU implementations, we have to flush the L2 cache
* before flushing the ecache.
*/
PN_L2_FLUSHALL(%g3, %g4, %g5)
/*
* Flush the entire Ecache using displacement flush.
*/
ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
retl
nop
SET_SIZE(flush_ecache)
#endif /* lint */
#endif /* !(JALAPENO || SERRANO) */
#if defined(lint)
void
flush_dcache(void)
{}
#else /* lint */
ENTRY(flush_dcache)
ASM_LD(%o0, dcache_size)
ASM_LD(%o1, dcache_linesize)
CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
retl
nop
SET_SIZE(flush_dcache)
#endif /* lint */
#if defined(lint)
void
flush_icache(void)
{}
#else /* lint */
ENTRY(flush_icache)
GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
ba,pt %icc, 2f
ld [%o0 + CHPR_ICACHE_SIZE], %o0
flush_icache_1:
ASM_LD(%o0, icache_size)
ASM_LD(%o1, icache_linesize)
2:
CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
retl
nop
SET_SIZE(flush_icache)
#endif /* lint */
#if defined(lint)
/*ARGSUSED*/
void
kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size,
int icache_lsize)
{
}
#else /* lint */
ENTRY(kdi_flush_idcache)
CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
membar #Sync
retl
nop
SET_SIZE(kdi_flush_idcache)
#endif /* lint */
#if defined(lint)
void
flush_pcache(void)
{}
#else /* lint */
ENTRY(flush_pcache)
PCACHE_FLUSHALL(%o0, %o1, %o2)
retl
nop
SET_SIZE(flush_pcache)
#endif /* lint */
#if defined(CPU_IMP_L1_CACHE_PARITY)
#if defined(lint)
/* ARGSUSED */
void
get_dcache_dtag(uint32_t dcache_idx, uint64_t *data)
{}
#else /* lint */
/*
* Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t
* structure (see cheetahregs.h):
* The Dcache *should* be turned off when this code is executed.
*/
.align 128
ENTRY(get_dcache_dtag)
rdpr %pstate, %o5
andn %o5, PSTATE_IE | PSTATE_AM, %o3
wrpr %g0, %o3, %pstate
b 1f
stx %o0, [%o1 + CH_DC_IDX]
.align 128
1:
ldxa [%o0]ASI_DC_TAG, %o2
stx %o2, [%o1 + CH_DC_TAG]
membar #Sync
ldxa [%o0]ASI_DC_UTAG, %o2
membar #Sync
stx %o2, [%o1 + CH_DC_UTAG]
ldxa [%o0]ASI_DC_SNP_TAG, %o2
stx %o2, [%o1 + CH_DC_SNTAG]
add %o1, CH_DC_DATA, %o1
clr %o3
2:
membar #Sync ! required before ASI_DC_DATA
ldxa [%o0 + %o3]ASI_DC_DATA, %o2
membar #Sync ! required after ASI_DC_DATA
stx %o2, [%o1 + %o3]
cmp %o3, CH_DC_DATA_REG_SIZE - 8
blt 2b
add %o3, 8, %o3
/*
* Unlike other CPUs in the family, D$ data parity bits for Panther
* do not reside in the microtag. Instead, we have to read them
* using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
* of just having 8 parity bits to protect all 32 bytes of data
* per line, we now have 32 bits of parity.
*/
GET_CPU_IMPL(%o3)
cmp %o3, PANTHER_IMPL
bne 4f
clr %o3
/*
* move our pointer to the next field where we store parity bits
* and add the offset of the last parity byte since we will be
* storing all 4 parity bytes within one 64 bit field like this:
*
* +------+------------+------------+------------+------------+
* | - | DC_parity | DC_parity | DC_parity | DC_parity |
* | - | for word 3 | for word 2 | for word 1 | for word 0 |
* +------+------------+------------+------------+------------+
* 63:32 31:24 23:16 15:8 7:0
*/
add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
/* add the DC_data_parity bit into our working index */
mov 1, %o2
sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
or %o0, %o2, %o0
3:
membar #Sync ! required before ASI_DC_DATA
ldxa [%o0 + %o3]ASI_DC_DATA, %o2
membar #Sync ! required after ASI_DC_DATA
stb %o2, [%o1]
dec %o1
cmp %o3, CH_DC_DATA_REG_SIZE - 8
blt 3b
add %o3, 8, %o3
4:
retl
wrpr %g0, %o5, %pstate
SET_SIZE(get_dcache_dtag)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
get_icache_dtag(uint32_t ecache_idx, uint64_t *data)
{}
#else /* lint */
/*
* Get icache data and tag. The data argument is a pointer to a ch_ic_data_t
* structure (see cheetahregs.h):
* The Icache *Must* be turned off when this function is called.
* This is because diagnostic accesses to the Icache interfere with cache
* consistency.
*/
.align 128
ENTRY(get_icache_dtag)
rdpr %pstate, %o5
andn %o5, PSTATE_IE | PSTATE_AM, %o3
wrpr %g0, %o3, %pstate
stx %o0, [%o1 + CH_IC_IDX]
ldxa [%o0]ASI_IC_TAG, %o2
stx %o2, [%o1 + CH_IC_PATAG]
add %o0, CH_ICTAG_UTAG, %o0
ldxa [%o0]ASI_IC_TAG, %o2
add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
stx %o2, [%o1 + CH_IC_UTAG]
ldxa [%o0]ASI_IC_TAG, %o2
add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
stx %o2, [%o1 + CH_IC_UPPER]
ldxa [%o0]ASI_IC_TAG, %o2
andn %o0, CH_ICTAG_TMASK, %o0
stx %o2, [%o1 + CH_IC_LOWER]
ldxa [%o0]ASI_IC_SNP_TAG, %o2
stx %o2, [%o1 + CH_IC_SNTAG]
add %o1, CH_IC_DATA, %o1
clr %o3
2:
ldxa [%o0 + %o3]ASI_IC_DATA, %o2
stx %o2, [%o1 + %o3]
cmp %o3, PN_IC_DATA_REG_SIZE - 8
blt 2b
add %o3, 8, %o3
retl
wrpr %g0, %o5, %pstate
SET_SIZE(get_icache_dtag)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
get_pcache_dtag(uint32_t pcache_idx, uint64_t *data)
{}
#else /* lint */
/*
* Get pcache data and tags.
* inputs:
* pcache_idx - fully constructed VA for for accessing P$ diagnostic
* registers. Contains PC_way and PC_addr shifted into
* the correct bit positions. See the PRM for more details.
* data - pointer to a ch_pc_data_t
* structure (see cheetahregs.h):
*/
.align 128
ENTRY(get_pcache_dtag)
rdpr %pstate, %o5
andn %o5, PSTATE_IE | PSTATE_AM, %o3
wrpr %g0, %o3, %pstate
stx %o0, [%o1 + CH_PC_IDX]
ldxa [%o0]ASI_PC_STATUS_DATA, %o2
stx %o2, [%o1 + CH_PC_STATUS]
ldxa [%o0]ASI_PC_TAG, %o2
stx %o2, [%o1 + CH_PC_TAG]
ldxa [%o0]ASI_PC_SNP_TAG, %o2
stx %o2, [%o1 + CH_PC_SNTAG]
add %o1, CH_PC_DATA, %o1
clr %o3
2:
ldxa [%o0 + %o3]ASI_PC_DATA, %o2
stx %o2, [%o1 + %o3]
cmp %o3, CH_PC_DATA_REG_SIZE - 8
blt 2b
add %o3, 8, %o3
retl
wrpr %g0, %o5, %pstate
SET_SIZE(get_pcache_dtag)
#endif /* lint */
#endif /* CPU_IMP_L1_CACHE_PARITY */
#if defined(lint)
/* ARGSUSED */
void
set_dcu(uint64_t dcu)
{}
#else /* lint */
/*
* re-enable the i$, d$, w$, and p$ according to bootup cache state.
* Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
* %o0 - 64 bit constant
*/
ENTRY(set_dcu)
stxa %o0, [%g0]ASI_DCU ! Store to DCU
flush %g0 /* flush required after changing the IC bit */
retl
nop
SET_SIZE(set_dcu)
#endif /* lint */
#if defined(lint)
uint64_t
get_dcu(void)
{
return ((uint64_t)0);
}
#else /* lint */
/*
* Return DCU register.
*/
ENTRY(get_dcu)
ldxa [%g0]ASI_DCU, %o0 /* DCU control register */
retl
nop
SET_SIZE(get_dcu)
#endif /* lint */
/*
* Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
*
* This handler is used to check for softints generated by error trap
* handlers to report errors. On Cheetah, this mechanism is used by the
* Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
* ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
ch_pil15_interrupt_instr(void)
{}
#else /* lint */
ENTRY_NP(ch_pil15_interrupt_instr)
ASM_JMP(%g1, ch_pil15_interrupt)
SET_SIZE(ch_pil15_interrupt_instr)
#endif
#if defined(lint)
void
ch_pil15_interrupt(void)
{}
#else /* lint */
ENTRY_NP(ch_pil15_interrupt)
/*
* Since pil_interrupt is hacked to assume that every level 15
* interrupt is generated by the CPU to indicate a performance
* counter overflow this gets ugly. Before calling pil_interrupt
* the Error at TL>0 pending status is inspected. If it is
* non-zero, then an error has occurred and it is handled.
* Otherwise control is transfered to pil_interrupt. Note that if
* an error is detected pil_interrupt will not be called and
* overflow interrupts may be lost causing erroneous performance
* measurements. However, error-recovery will have a detrimental
* effect on performance anyway.
*/
CPU_INDEX(%g1, %g4)
set ch_err_tl1_pending, %g4
ldub [%g1 + %g4], %g2
brz %g2, 1f
nop
/*
* We have a pending TL>0 error, clear the TL>0 pending status.
*/
stb %g0, [%g1 + %g4]
/*
* Clear the softint.
*/
mov 1, %g5
sll %g5, PIL_15, %g5
wr %g5, CLEAR_SOFTINT
/*
* For Cheetah*, call cpu_tl1_error via systrap at PIL 15
* to process the Fast ECC/Cache Parity at TL>0 error. Clear
* panic flag (%g2).
*/
set cpu_tl1_error, %g1
clr %g2
ba sys_trap
mov PIL_15, %g4
1:
/*
* The logout is invalid.
*
* Call the default interrupt handler.
*/
sethi %hi(pil_interrupt), %g1
jmp %g1 + %lo(pil_interrupt)
mov PIL_15, %g4
SET_SIZE(ch_pil15_interrupt)
#endif
/*
* Error Handling
*
* Cheetah provides error checking for all memory access paths between
* the CPU, External Cache, Cheetah Data Switch and system bus. Error
* information is logged in the AFSR, (also AFSR_EXT for Panther) and
* AFAR and one of the following traps is generated (provided that it
* is enabled in External Cache Error Enable Register) to handle that
* error:
* 1. trap 0x70: Precise trap
* tt0_fecc for errors at trap level(TL)>=0
* 2. trap 0x0A and 0x32: Deferred trap
* async_err for errors at TL>=0
* 3. trap 0x63: Disrupting trap
* ce_err for errors at TL=0
* (Note that trap 0x63 cannot happen at trap level > 0)
*
* Trap level one handlers panic the system except for the fast ecc
* error handler which tries to recover from certain errors.
*/
/*
* FAST ECC TRAP STRATEGY:
*
* Software must handle single and multi bit errors which occur due to data
* or instruction cache reads from the external cache. A single or multi bit
* error occuring in one of these situations results in a precise trap.
*
* The basic flow of this trap handler is as follows:
*
* 1) Record the state and then turn off the Dcache and Icache. The Dcache
* is disabled because bad data could have been installed. The Icache is
* turned off because we want to capture the Icache line related to the
* AFAR.
* 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
* 3) Park sibling core if caches are shared (to avoid race condition while
* accessing shared resources such as L3 data staging register during
* CPU logout.
* 4) Read the AFAR and AFSR.
* 5) If CPU logout structure is not being used, then:
* 6) Clear all errors from the AFSR.
* 7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
* 8) Flush Ecache then Flush Dcache and Icache and restore to previous
* state.
* 9) Unpark sibling core if we parked it earlier.
* 10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
* running at PIL 15.
* 6) Otherwise, if CPU logout structure is being used:
* 7) Incriment the "logout busy count".
* 8) Flush Ecache then Flush Dcache and Icache and restore to previous
* state.
* 9) Unpark sibling core if we parked it earlier.
* 10) Issue a retry since the other CPU error logging code will end up
* finding this error bit and logging information about it later.
* 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
* yet initialized such that we can't even check the logout struct, then
* we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
* call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
* to determine information such as TL, TT, CEEN and NCEEN settings, etc
* in the high level trap handler since we don't have access to detailed
* logout information in cases where the cpu_private struct is not yet
* initialized.
*
* We flush the E$ and D$ here on TL=1 code to prevent getting nested
* Fast ECC traps in the TL=0 code. If we get a Fast ECC event here in
* the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
* since it is uses different code/data from this handler, has a better
* chance of fixing things up than simply recursing through this code
* again (this would probably cause an eventual kernel stack overflow).
* If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
* can flush the E$ (or the error is a stuck-at bit), we will recurse in
* the Fast ECC at TL>0 handler and eventually Red Mode.
*
* Note that for Cheetah (and only Cheetah), we use alias addresses for
* flushing rather than ASI accesses (which don't exist on Cheetah).
* Should we encounter a Fast ECC error within this handler on Cheetah,
* there's a good chance it's within the ecache_flushaddr buffer (since
* it's the largest piece of memory we touch in the handler and it is
* usually kernel text/data). For that reason the Fast ECC at TL>0
* handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
*/
/*
* Cheetah ecc-protected E$ trap (Trap 70) at TL=0
* tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
* architecture-specific files.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
fecc_err_instr(void)
{}
#else /* lint */
ENTRY_NP(fecc_err_instr)
membar #Sync ! Cheetah requires membar #Sync
/*
* Save current DCU state. Turn off the Dcache and Icache.
*/
ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
andn %g1, DCU_DC + DCU_IC, %g4
stxa %g4, [%g0]ASI_DCU
flush %g0 /* flush required after changing the IC bit */
ASM_JMP(%g4, fast_ecc_err)
SET_SIZE(fecc_err_instr)
#endif /* lint */
#if !(defined(JALAPENO) || defined(SERRANO))
#if defined(lint)
void
fast_ecc_err(void)
{}
#else /* lint */
.section ".text"
.align 64
ENTRY_NP(fast_ecc_err)
/*
* Turn off CEEN and NCEEN.
*/
ldxa [%g0]ASI_ESTATE_ERR, %g3
andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
stxa %g4, [%g0]ASI_ESTATE_ERR
membar #Sync ! membar sync required
/*
* Check to see whether we need to park our sibling core
* before recording diagnostic information from caches
* which may be shared by both cores.
* We use %g1 to store information about whether or not
* we had to park the core (%g1 holds our DCUCR value and
* we only use bits from that register which are "reserved"
* to keep track of core parking) so that we know whether
* or not to unpark later. %g5 and %g4 are scratch registers.
*/
PARK_SIBLING_CORE(%g1, %g5, %g4)
/*
* Do the CPU log out capture.
* %g3 = "failed?" return value.
* %g2 = Input = AFAR. Output the clo_flags info which is passed
* into this macro via %g4. Output only valid if cpu_private
* struct has not been initialized.
* CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
* %g4 = Trap information stored in the cpu logout flags field
* %g5 = scr1
* %g6 = scr2
* %g3 = scr3
* %g4 = scr4
*/
/* store the CEEN and NCEEN values, TL=0 */
and %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
set CHPR_FECCTL0_LOGOUT, %g6
DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
/*
* Flush the Ecache (and L2 cache for Panther) to get the error out
* of the Ecache. If the UCC or UCU is on a dirty line, then the
* following flush will turn that into a WDC or WDU, respectively.
*/
PN_L2_FLUSHALL(%g4, %g5, %g6)
CPU_INDEX(%g4, %g5)
mulx %g4, CPU_NODE_SIZE, %g4
set cpunodes, %g5
add %g4, %g5, %g4
ld [%g4 + ECACHE_LINESIZE], %g5
ld [%g4 + ECACHE_SIZE], %g4
ASM_LDX(%g6, ecache_flushaddr)
ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
/*
* Flush the Dcache. Since bad data could have been installed in
* the Dcache we must flush it before re-enabling it.
*/
ASM_LD(%g5, dcache_size)
ASM_LD(%g6, dcache_linesize)
CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
/*
* Flush the Icache. Since we turned off the Icache to capture the
* Icache line it is now stale or corrupted and we must flush it
* before re-enabling it.
*/
GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
ba,pt %icc, 6f
ld [%g5 + CHPR_ICACHE_SIZE], %g5
fast_ecc_err_5:
ASM_LD(%g5, icache_size)
ASM_LD(%g6, icache_linesize)
6:
CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
/*
* check to see whether we parked our sibling core at the start
* of this handler. If so, we need to unpark it here.
* We use DCUCR reserved bits (stored in %g1) to keep track of
* whether or not we need to unpark. %g5 and %g4 are scratch registers.
*/
UNPARK_SIBLING_CORE(%g1, %g5, %g4)
/*
* Restore the Dcache and Icache to the previous state.
*/
stxa %g1, [%g0]ASI_DCU
flush %g0 /* flush required after changing the IC bit */
/*
* Make sure our CPU logout operation was successful.
*/
cmp %g3, %g0
be 8f
nop
/*
* If the logout structure had been busy, how many times have
* we tried to use it and failed (nesting count)? If we have
* already recursed a substantial number of times, then we can
* assume things are not going to get better by themselves and
* so it would be best to panic.
*/
cmp %g3, CLO_NESTING_MAX
blt 7f
nop
call ptl1_panic
mov PTL1_BAD_ECC, %g1
7:
/*
* Otherwise, if the logout structure was busy but we have not
* nested more times than our maximum value, then we simply
* issue a retry. Our TL=0 trap handler code will check and
* clear the AFSR after it is done logging what is currently
* in the logout struct and handle this event at that time.
*/
retry
8:
/*
* Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
* already at PIL 15.
*/
set cpu_fast_ecc_error, %g1
rdpr %pil, %g4
cmp %g4, PIL_14
ba sys_trap
movl %icc, PIL_14, %g4
SET_SIZE(fast_ecc_err)
#endif /* lint */
#endif /* !(JALAPENO || SERRANO) */
/*
* Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
*
* The basic flow of this trap handler is as follows:
*
* 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
* software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
* will use to save %g1 and %g2.
* 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
* we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
* handler (using the just saved %g1).
* 3) Turn off the Dcache if it was on and save the state of the Dcache
* (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
* NB: we don't turn off the Icache because bad data is not installed nor
* will we be doing any diagnostic accesses.
* 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
* 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
* %tpc, %tnpc, %tstate values previously saved).
* 6) set %tl to %tl - 1.
* 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
* 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
* 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear. For
* Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
* Save the values in ch_err_tl1_data. For Panther, read the shadow
* AFSR_EXT and save the value in ch_err_tl1_data.
* 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
* being queued. We'll report them via the AFSR/AFAR capture in step 13.
* 11) Flush the Ecache.
* NB: the Ecache is flushed assuming the largest possible size with
* the smallest possible line size since access to the cpu_nodes may
* cause an unrecoverable DTLB miss.
* 12) Reenable CEEN/NCEEN with the value saved from step 10.
* 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
* For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
* Save the read AFSR/AFAR values in ch_err_tl1_data. For Panther,
* read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
* 14) Flush and re-enable the Dcache if it was on at step 3.
* 15) Do TRAPTRACE if enabled.
* 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
* 17) Set the event pending flag in ch_err_tl1_pending[CPU]
* 18) Cause a softint 15. The pil15_interrupt handler will inspect the
* event pending flag and call cpu_tl1_error via systrap if set.
* 19) Restore the registers from step 5 and issue retry.
*/
/*
* Cheetah ecc-protected E$ trap (Trap 70) at TL>0
* tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
* architecture-specific files. This generates a "Software Trap 0" at TL>0,
* which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
fecc_err_tl1_instr(void)
{}
#else /* lint */
ENTRY_NP(fecc_err_tl1_instr)
CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
SET_SIZE(fecc_err_tl1_instr)
#endif /* lint */
/*
* Software trap 0 at TL>0.
* tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
* the various architecture-specific files. This is used as a continuation
* of the fast ecc handling where we've bought an extra TL level, so we can
* use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
* and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
* there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
* order two bits from %g1 and %g2 respectively).
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
fecc_err_tl1_cont_instr(void)
{}
#else /* lint */
ENTRY_NP(fecc_err_tl1_cont_instr)
CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
SET_SIZE(fecc_err_tl1_cont_instr)
#endif /* lint */
#if defined(lint)
void
ce_err(void)
{}
#else /* lint */
/*
* The ce_err function handles disrupting trap type 0x63 at TL=0.
*
* AFSR errors bits which cause this trap are:
* CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
*
* NCEEN Bit of Cheetah External Cache Error Enable Register enables
* the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
*
* CEEN Bit of Cheetah External Cache Error Enable Register enables
* the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
*
* Cheetah+ also handles (No additional processing required):
* DUE, DTO, DBERR (NCEEN controlled)
* THCE (CEEN and ET_ECC_en controlled)
* TUE (ET_ECC_en controlled)
*
* Panther further adds:
* IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
* IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
* TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled)
* L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled)
* THCE (CEEN and L2_tag_ECC_en controlled)
* L3_THCE (CEEN and ET_ECC_en controlled)
*
* Steps:
* 1. Disable hardware corrected disrupting errors only (CEEN)
* 2. Park sibling core if caches are shared (to avoid race
* condition while accessing shared resources such as L3
* data staging register during CPU logout.
* 3. If the CPU logout structure is not currently being used:
* 4. Clear AFSR error bits
* 5. Capture Ecache, Dcache and Icache lines associated
* with AFAR.
* 6. Unpark sibling core if we parked it earlier.
* 7. call cpu_disrupting_error via sys_trap at PIL 14
* unless we're already running at PIL 15.
* 4. Otherwise, if the CPU logout structure is busy:
* 5. Incriment "logout busy count" and place into %g3
* 6. Unpark sibling core if we parked it earlier.
* 7. Issue a retry since the other CPU error logging
* code will end up finding this error bit and logging
* information about it later.
* 5. Alternatively (to 3 and 4 above), if the cpu_private struct is
* not yet initialized such that we can't even check the logout
* struct, then we place the clo_flags data into %g2
* (sys_trap->have_win arg #1) and call cpu_disrupting_error via
* systrap. The clo_flags parameter is used to determine information
* such as TL, TT, CEEN settings, etc in the high level trap
* handler since we don't have access to detailed logout information
* in cases where the cpu_private struct is not yet initialized.
*
* %g3: [ logout busy count ] - arg #2
* %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
*/
.align 128
ENTRY_NP(ce_err)
membar #Sync ! Cheetah requires membar #Sync
/*
* Disable trap on hardware corrected errors (CEEN) while at TL=0
* to prevent recursion.
*/
ldxa [%g0]ASI_ESTATE_ERR, %g1
bclr EN_REG_CEEN, %g1
stxa %g1, [%g0]ASI_ESTATE_ERR
membar #Sync ! membar sync required
/*
* Save current DCU state. Turn off Icache to allow capture of
* Icache data by DO_CPU_LOGOUT.
*/
ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
andn %g1, DCU_IC, %g4
stxa %g4, [%g0]ASI_DCU
flush %g0 /* flush required after changing the IC bit */
/*
* Check to see whether we need to park our sibling core
* before recording diagnostic information from caches
* which may be shared by both cores.
* We use %g1 to store information about whether or not
* we had to park the core (%g1 holds our DCUCR value and
* we only use bits from that register which are "reserved"
* to keep track of core parking) so that we know whether
* or not to unpark later. %g5 and %g4 are scratch registers.
*/
PARK_SIBLING_CORE(%g1, %g5, %g4)
/*
* Do the CPU log out capture.
* %g3 = "failed?" return value.
* %g2 = Input = AFAR. Output the clo_flags info which is passed
* into this macro via %g4. Output only valid if cpu_private
* struct has not been initialized.
* CHPR_CECC_LOGOUT = cpu logout structure offset input
* %g4 = Trap information stored in the cpu logout flags field
* %g5 = scr1
* %g6 = scr2
* %g3 = scr3
* %g4 = scr4
*/
clr %g4 ! TL=0 bit in afsr
set CHPR_CECC_LOGOUT, %g6
DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
/*
* Flush the Icache. Since we turned off the Icache to capture the
* Icache line it is now stale or corrupted and we must flush it
* before re-enabling it.
*/
GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
ba,pt %icc, 2f
ld [%g5 + CHPR_ICACHE_SIZE], %g5
ce_err_1:
ASM_LD(%g5, icache_size)
ASM_LD(%g6, icache_linesize)
2:
CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
/*
* check to see whether we parked our sibling core at the start
* of this handler. If so, we need to unpark it here.
* We use DCUCR reserved bits (stored in %g1) to keep track of
* whether or not we need to unpark. %g5 and %g4 are scratch registers.
*/
UNPARK_SIBLING_CORE(%g1, %g5, %g4)
/*
* Restore Icache to previous state.
*/
stxa %g1, [%g0]ASI_DCU
flush %g0 /* flush required after changing the IC bit */
/*
* Make sure our CPU logout operation was successful.
*/
cmp %g3, %g0
be 4f
nop
/*
* If the logout structure had been busy, how many times have
* we tried to use it and failed (nesting count)? If we have
* already recursed a substantial number of times, then we can
* assume things are not going to get better by themselves and
* so it would be best to panic.
*/
cmp %g3, CLO_NESTING_MAX
blt 3f
nop
call ptl1_panic
mov PTL1_BAD_ECC, %g1
3:
/*
* Otherwise, if the logout structure was busy but we have not
* nested more times than our maximum value, then we simply
* issue a retry. Our TL=0 trap handler code will check and
* clear the AFSR after it is done logging what is currently
* in the logout struct and handle this event at that time.
*/
retry
4:
/*
* Call cpu_disrupting_error via systrap at PIL 14 unless we're
* already at PIL 15.
*/
set cpu_disrupting_error, %g1
rdpr %pil, %g4
cmp %g4, PIL_14
ba sys_trap
movl %icc, PIL_14, %g4
SET_SIZE(ce_err)
#endif /* lint */
#if defined(lint)
/*
* This trap cannot happen at TL>0 which means this routine will never
* actually be called and so we treat this like a BAD TRAP panic.
*/
void
ce_err_tl1(void)
{}
#else /* lint */
.align 64
ENTRY_NP(ce_err_tl1)
call ptl1_panic
mov PTL1_BAD_TRAP, %g1
SET_SIZE(ce_err_tl1)
#endif /* lint */
#if defined(lint)
void
async_err(void)
{}
#else /* lint */
/*
* The async_err function handles deferred trap types 0xA
* (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
*
* AFSR errors bits which cause this trap are:
* UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
* On some platforms, EMU may causes cheetah to pull the error pin
* never giving Solaris a chance to take a trap.
*
* NCEEN Bit of Cheetah External Cache Error Enable Register enables
* the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
*
* Steps:
* 1. Disable CEEN and NCEEN errors to prevent recursive errors.
* 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
* I$ line in DO_CPU_LOGOUT.
* 3. Park sibling core if caches are shared (to avoid race
* condition while accessing shared resources such as L3
* data staging register during CPU logout.
* 4. If the CPU logout structure is not currently being used:
* 5. Clear AFSR error bits
* 6. Capture Ecache, Dcache and Icache lines associated
* with AFAR.
* 7. Unpark sibling core if we parked it earlier.
* 8. call cpu_deferred_error via sys_trap.
* 5. Otherwise, if the CPU logout structure is busy:
* 6. Incriment "logout busy count"
* 7. Unpark sibling core if we parked it earlier.
* 8) Issue a retry since the other CPU error logging
* code will end up finding this error bit and logging
* information about it later.
* 6. Alternatively (to 4 and 5 above), if the cpu_private struct is
* not yet initialized such that we can't even check the logout
* struct, then we place the clo_flags data into %g2
* (sys_trap->have_win arg #1) and call cpu_deferred_error via
* systrap. The clo_flags parameter is used to determine information
* such as TL, TT, CEEN settings, etc in the high level trap handler
* since we don't have access to detailed logout information in cases
* where the cpu_private struct is not yet initialized.
*
* %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
* %g3: [ logout busy count ] - arg #2
*/
ENTRY_NP(async_err)
membar #Sync ! Cheetah requires membar #Sync
/*
* Disable CEEN and NCEEN.
*/
ldxa [%g0]ASI_ESTATE_ERR, %g3
andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
stxa %g4, [%g0]ASI_ESTATE_ERR
membar #Sync ! membar sync required
/*
* Save current DCU state.
* Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
* Do this regardless of whether this is a Data Access Error or
* Instruction Access Error Trap.
* Disable Dcache for both Data Access Error and Instruction Access
* Error per Cheetah PRM P.5 Note 6.
*/
ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
andn %g1, DCU_IC + DCU_DC, %g4
stxa %g4, [%g0]ASI_DCU
flush %g0 /* flush required after changing the IC bit */
/*
* Check to see whether we need to park our sibling core
* before recording diagnostic information from caches
* which may be shared by both cores.
* We use %g1 to store information about whether or not
* we had to park the core (%g1 holds our DCUCR value and
* we only use bits from that register which are "reserved"
* to keep track of core parking) so that we know whether
* or not to unpark later. %g6 and %g4 are scratch registers.
*/
PARK_SIBLING_CORE(%g1, %g6, %g4)
/*
* Do the CPU logout capture.
*
* %g3 = "failed?" return value.
* %g2 = Input = AFAR. Output the clo_flags info which is passed
* into this macro via %g4. Output only valid if cpu_private
* struct has not been initialized.
* CHPR_ASYNC_LOGOUT = cpu logout structure offset input
* %g4 = Trap information stored in the cpu logout flags field
* %g5 = scr1
* %g6 = scr2
* %g3 = scr3
* %g4 = scr4
*/
andcc %g5, T_TL1, %g0
clr %g6
movnz %xcc, 1, %g6 ! set %g6 if T_TL1 set
sllx %g6, CLO_FLAGS_TL_SHIFT, %g6
sllx %g5, CLO_FLAGS_TT_SHIFT, %g4
set CLO_FLAGS_TT_MASK, %g2
and %g4, %g2, %g4 ! ttype
or %g6, %g4, %g4 ! TT and TL
and %g3, EN_REG_CEEN, %g3 ! CEEN value
or %g3, %g4, %g4 ! TT and TL and CEEN
set CHPR_ASYNC_LOGOUT, %g6
DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
/*
* If the logout struct was busy, we may need to pass the
* TT, TL, and CEEN information to the TL=0 handler via
* systrap parameter so save it off here.
*/
cmp %g3, %g0
be 1f
nop
sllx %g4, 32, %g4
or %g4, %g3, %g3
1:
/*
* Flush the Icache. Since we turned off the Icache to capture the
* Icache line it is now stale or corrupted and we must flush it
* before re-enabling it.
*/
GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
ba,pt %icc, 2f
ld [%g5 + CHPR_ICACHE_SIZE], %g5
async_err_1:
ASM_LD(%g5, icache_size)
ASM_LD(%g6, icache_linesize)
2:
CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
/*
* XXX - Don't we need to flush the Dcache before turning it back
* on to avoid stale or corrupt data? Was this broken?
*/
/*
* Flush the Dcache before turning it back on since it may now
* contain stale or corrupt data.
*/
ASM_LD(%g5, dcache_size)
ASM_LD(%g6, dcache_linesize)
CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
/*
* check to see whether we parked our sibling core at the start
* of this handler. If so, we need to unpark it here.
* We use DCUCR reserved bits (stored in %g1) to keep track of
* whether or not we need to unpark. %g5 and %g7 are scratch registers.
*/
UNPARK_SIBLING_CORE(%g1, %g5, %g7)
/*
* Restore Icache and Dcache to previous state.
*/
stxa %g1, [%g0]ASI_DCU
flush %g0 /* flush required after changing the IC bit */
/*
* Make sure our CPU logout operation was successful.
*/
cmp %g3, %g0
be 4f
nop
/*
* If the logout structure had been busy, how many times have
* we tried to use it and failed (nesting count)? If we have
* already recursed a substantial number of times, then we can
* assume things are not going to get better by themselves and
* so it would be best to panic.
*/
cmp %g3, CLO_NESTING_MAX
blt 3f
nop
call ptl1_panic
mov PTL1_BAD_ECC, %g1
3:
/*
* Otherwise, if the logout structure was busy but we have not
* nested more times than our maximum value, then we simply
* issue a retry. Our TL=0 trap handler code will check and
* clear the AFSR after it is done logging what is currently
* in the logout struct and handle this event at that time.
*/
retry
4:
RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
async_err_resetskip:
set cpu_deferred_error, %g1
ba sys_trap
mov PIL_15, %g4 ! run at pil 15
SET_SIZE(async_err)
#endif /* lint */
#if defined(CPU_IMP_L1_CACHE_PARITY)
/*
* D$ parity error trap (trap 71) at TL=0.
* tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
* the various architecture-specific files. This merely sets up the
* arguments for cpu_parity_error and calls it via sys_trap.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
dcache_parity_instr(void)
{}
#else /* lint */
ENTRY_NP(dcache_parity_instr)
membar #Sync ! Cheetah+ requires membar #Sync
set cpu_parity_error, %g1
or %g0, CH_ERR_DPE, %g2
rdpr %tpc, %g3
sethi %hi(sys_trap), %g7
jmp %g7 + %lo(sys_trap)
mov PIL_15, %g4 ! run at pil 15
SET_SIZE(dcache_parity_instr)
#endif /* lint */
/*
* D$ parity error trap (trap 71) at TL>0.
* tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
* the various architecture-specific files. This generates a "Software
* Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
* continue the handling there.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
dcache_parity_tl1_instr(void)
{}
#else /* lint */
ENTRY_NP(dcache_parity_tl1_instr)
CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
SET_SIZE(dcache_parity_tl1_instr)
#endif /* lint */
/*
* Software trap 1 at TL>0.
* tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
* of the various architecture-specific files. This is used as a continuation
* of the dcache parity handling where we've bought an extra TL level, so we
* can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
* and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
* there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
* order two bits from %g1 and %g2 respectively).
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
dcache_parity_tl1_cont_instr(void)
{}
#else /* lint */
ENTRY_NP(dcache_parity_tl1_cont_instr)
CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
SET_SIZE(dcache_parity_tl1_cont_instr)
#endif /* lint */
/*
* D$ parity error at TL>0 handler
* We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter
* this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
*/
#if defined(lint)
void
dcache_parity_tl1_err(void)
{}
#else /* lint */
ENTRY_NP(dcache_parity_tl1_err)
/*
* This macro saves all the %g registers in the ch_err_tl1_data
* structure, updates the ch_err_tl1_flags and saves the %tpc in
* ch_err_tl1_tpc. At the end of this macro, %g1 will point to
* the ch_err_tl1_data structure and %g2 will have the original
* flags in the ch_err_tl1_data structure. All %g registers
* except for %g1 and %g2 will be available.
*/
CH_ERR_TL1_ENTER(CH_ERR_DPE);
#ifdef TRAPTRACE
/*
* Get current trap trace entry physical pointer.
*/
CPU_INDEX(%g6, %g5)
sll %g6, TRAPTR_SIZE_SHIFT, %g6
set trap_trace_ctl, %g5
add %g6, %g5, %g6
ld [%g6 + TRAPTR_LIMIT], %g5
tst %g5
be %icc, dpe_tl1_skip_tt
nop
ldx [%g6 + TRAPTR_PBASE], %g5
ld [%g6 + TRAPTR_OFFSET], %g4
add %g5, %g4, %g5
/*
* Create trap trace entry.
*/
rd %asi, %g7
wr %g0, TRAPTR_ASI, %asi
rd STICK, %g4
stxa %g4, [%g5 + TRAP_ENT_TICK]%asi
rdpr %tl, %g4
stha %g4, [%g5 + TRAP_ENT_TL]%asi
rdpr %tt, %g4
stha %g4, [%g5 + TRAP_ENT_TT]%asi
rdpr %tpc, %g4
stna %g4, [%g5 + TRAP_ENT_TPC]%asi
rdpr %tstate, %g4
stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi
stna %sp, [%g5 + TRAP_ENT_SP]%asi
stna %g0, [%g5 + TRAP_ENT_TR]%asi
stna %g0, [%g5 + TRAP_ENT_F1]%asi
stna %g0, [%g5 + TRAP_ENT_F2]%asi
stna %g0, [%g5 + TRAP_ENT_F3]%asi
stna %g0, [%g5 + TRAP_ENT_F4]%asi
wr %g0, %g7, %asi
/*
* Advance trap trace pointer.
*/
ld [%g6 + TRAPTR_OFFSET], %g5
ld [%g6 + TRAPTR_LIMIT], %g4
st %g5, [%g6 + TRAPTR_LAST_OFFSET]
add %g5, TRAP_ENT_SIZE, %g5
sub %g4, TRAP_ENT_SIZE, %g4
cmp %g5, %g4
movge %icc, 0, %g5
st %g5, [%g6 + TRAPTR_OFFSET]
dpe_tl1_skip_tt:
#endif /* TRAPTRACE */
/*
* I$ and D$ are automatically turned off by HW when the CPU hits
* a dcache or icache parity error so we will just leave those two
* off for now to avoid repeating this trap.
* For Panther, however, since we trap on P$ data parity errors
* and HW does not automatically disable P$, we need to disable it
* here so that we don't encounter any recursive traps when we
* issue the retry.
*/
ldxa [%g0]ASI_DCU, %g3
mov 1, %g4
sllx %g4, DCU_PE_SHIFT, %g4
andn %g3, %g4, %g3
stxa %g3, [%g0]ASI_DCU
membar #Sync
/*
* We fall into this macro if we've successfully logged the error in
* the ch_err_tl1_data structure and want the PIL15 softint to pick
* it up and log it. %g1 must point to the ch_err_tl1_data structure.
* Restores the %g registers and issues retry.
*/
CH_ERR_TL1_EXIT;
SET_SIZE(dcache_parity_tl1_err)
#endif /* lint */
/*
* I$ parity error trap (trap 72) at TL=0.
* tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
* the various architecture-specific files. This merely sets up the
* arguments for cpu_parity_error and calls it via sys_trap.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
icache_parity_instr(void)
{}
#else /* lint */
ENTRY_NP(icache_parity_instr)
membar #Sync ! Cheetah+ requires membar #Sync
set cpu_parity_error, %g1
or %g0, CH_ERR_IPE, %g2
rdpr %tpc, %g3
sethi %hi(sys_trap), %g7
jmp %g7 + %lo(sys_trap)
mov PIL_15, %g4 ! run at pil 15
SET_SIZE(icache_parity_instr)
#endif /* lint */
/*
* I$ parity error trap (trap 72) at TL>0.
* tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
* the various architecture-specific files. This generates a "Software
* Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
* continue the handling there.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
icache_parity_tl1_instr(void)
{}
#else /* lint */
ENTRY_NP(icache_parity_tl1_instr)
CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
SET_SIZE(icache_parity_tl1_instr)
#endif /* lint */
/*
* Software trap 2 at TL>0.
* tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
* of the various architecture-specific files. This is used as a continuation
* of the icache parity handling where we've bought an extra TL level, so we
* can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
* and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
* there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
* order two bits from %g1 and %g2 respectively).
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
icache_parity_tl1_cont_instr(void)
{}
#else /* lint */
ENTRY_NP(icache_parity_tl1_cont_instr)
CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
SET_SIZE(icache_parity_tl1_cont_instr)
#endif /* lint */
/*
* I$ parity error at TL>0 handler
* We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter
* this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
*/
#if defined(lint)
void
icache_parity_tl1_err(void)
{}
#else /* lint */
ENTRY_NP(icache_parity_tl1_err)
/*
* This macro saves all the %g registers in the ch_err_tl1_data
* structure, updates the ch_err_tl1_flags and saves the %tpc in
* ch_err_tl1_tpc. At the end of this macro, %g1 will point to
* the ch_err_tl1_data structure and %g2 will have the original
* flags in the ch_err_tl1_data structure. All %g registers
* except for %g1 and %g2 will be available.
*/
CH_ERR_TL1_ENTER(CH_ERR_IPE);
#ifdef TRAPTRACE
/*
* Get current trap trace entry physical pointer.
*/
CPU_INDEX(%g6, %g5)
sll %g6, TRAPTR_SIZE_SHIFT, %g6
set trap_trace_ctl, %g5
add %g6, %g5, %g6
ld [%g6 + TRAPTR_LIMIT], %g5
tst %g5
be %icc, ipe_tl1_skip_tt
nop
ldx [%g6 + TRAPTR_PBASE], %g5
ld [%g6 + TRAPTR_OFFSET], %g4
add %g5, %g4, %g5
/*
* Create trap trace entry.
*/
rd %asi, %g7
wr %g0, TRAPTR_ASI, %asi
rd STICK, %g4
stxa %g4, [%g5 + TRAP_ENT_TICK]%asi
rdpr %tl, %g4
stha %g4, [%g5 + TRAP_ENT_TL]%asi
rdpr %tt, %g4
stha %g4, [%g5 + TRAP_ENT_TT]%asi
rdpr %tpc, %g4
stna %g4, [%g5 + TRAP_ENT_TPC]%asi
rdpr %tstate, %g4
stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi
stna %sp, [%g5 + TRAP_ENT_SP]%asi
stna %g0, [%g5 + TRAP_ENT_TR]%asi
stna %g0, [%g5 + TRAP_ENT_F1]%asi
stna %g0, [%g5 + TRAP_ENT_F2]%asi
stna %g0, [%g5 + TRAP_ENT_F3]%asi
stna %g0, [%g5 + TRAP_ENT_F4]%asi
wr %g0, %g7, %asi
/*
* Advance trap trace pointer.
*/
ld [%g6 + TRAPTR_OFFSET], %g5
ld [%g6 + TRAPTR_LIMIT], %g4
st %g5, [%g6 + TRAPTR_LAST_OFFSET]
add %g5, TRAP_ENT_SIZE, %g5
sub %g4, TRAP_ENT_SIZE, %g4
cmp %g5, %g4
movge %icc, 0, %g5
st %g5, [%g6 + TRAPTR_OFFSET]
ipe_tl1_skip_tt:
#endif /* TRAPTRACE */
/*
* We fall into this macro if we've successfully logged the error in
* the ch_err_tl1_data structure and want the PIL15 softint to pick
* it up and log it. %g1 must point to the ch_err_tl1_data structure.
* Restores the %g registers and issues retry.
*/
CH_ERR_TL1_EXIT;
SET_SIZE(icache_parity_tl1_err)
#endif /* lint */
#endif /* CPU_IMP_L1_CACHE_PARITY */
/*
* The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
* tte, the virtual address, and the ctxnum of the specified tlb entry. They
* should only be used in places where you have no choice but to look at the
* tlb itself.
*
* Note: These two routines are required by the Estar "cpr" loadable module.
*/
#if defined(lint)
/* ARGSUSED */
void
itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
{}
#else /* lint */
ENTRY_NP(itlb_rd_entry)
sllx %o0, 3, %o0
ldxa [%o0]ASI_ITLB_ACCESS, %g1
stx %g1, [%o1]
ldxa [%o0]ASI_ITLB_TAGREAD, %g2
set TAGREAD_CTX_MASK, %o4
andn %g2, %o4, %o5
retl
stx %o5, [%o2]
SET_SIZE(itlb_rd_entry)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
{}
#else /* lint */
ENTRY_NP(dtlb_rd_entry)
sllx %o0, 3, %o0
ldxa [%o0]ASI_DTLB_ACCESS, %g1
stx %g1, [%o1]
ldxa [%o0]ASI_DTLB_TAGREAD, %g2
set TAGREAD_CTX_MASK, %o4
andn %g2, %o4, %o5
retl
stx %o5, [%o2]
SET_SIZE(dtlb_rd_entry)
#endif /* lint */
#if !(defined(JALAPENO) || defined(SERRANO))
#if defined(lint)
uint64_t
get_safari_config(void)
{ return (0); }
#else /* lint */
ENTRY(get_safari_config)
ldxa [%g0]ASI_SAFARI_CONFIG, %o0
retl
nop
SET_SIZE(get_safari_config)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
set_safari_config(uint64_t safari_config)
{}
#else /* lint */
ENTRY(set_safari_config)
stxa %o0, [%g0]ASI_SAFARI_CONFIG
membar #Sync
retl
nop
SET_SIZE(set_safari_config)
#endif /* lint */
#endif /* !(JALAPENO || SERRANO) */
#if defined(lint)
void
cpu_cleartickpnt(void)
{}
#else /* lint */
/*
* Clear the NPT (non-privileged trap) bit in the %tick/%stick
* registers. In an effort to make the change in the
* tick/stick counter as consistent as possible, we disable
* all interrupts while we're changing the registers. We also
* ensure that the read and write instructions are in the same
* line in the instruction cache.
*/
ENTRY_NP(cpu_clearticknpt)
rdpr %pstate, %g1 /* save processor state */
andn %g1, PSTATE_IE, %g3 /* turn off */
wrpr %g0, %g3, %pstate /* interrupts */
rdpr %tick, %g2 /* get tick register */
brgez,pn %g2, 1f /* if NPT bit off, we're done */
mov 1, %g3 /* create mask */
sllx %g3, 63, %g3 /* for NPT bit */
ba,a,pt %xcc, 2f
.align 8 /* Ensure rd/wr in same i$ line */
2:
rdpr %tick, %g2 /* get tick register */
wrpr %g3, %g2, %tick /* write tick register, */
/* clearing NPT bit */
1:
rd STICK, %g2 /* get stick register */
brgez,pn %g2, 3f /* if NPT bit off, we're done */
mov 1, %g3 /* create mask */
sllx %g3, 63, %g3 /* for NPT bit */
ba,a,pt %xcc, 4f
.align 8 /* Ensure rd/wr in same i$ line */
4:
rd STICK, %g2 /* get stick register */
wr %g3, %g2, STICK /* write stick register, */
/* clearing NPT bit */
3:
jmp %g4 + 4
wrpr %g0, %g1, %pstate /* restore processor state */
SET_SIZE(cpu_clearticknpt)
#endif /* lint */
#if defined(CPU_IMP_L1_CACHE_PARITY)
#if defined(lint)
/*
* correct_dcache_parity(size_t size, size_t linesize)
*
* Correct D$ data parity by zeroing the data and initializing microtag
* for all indexes and all ways of the D$.
*
*/
/* ARGSUSED */
void
correct_dcache_parity(size_t size, size_t linesize)
{}
#else /* lint */
ENTRY(correct_dcache_parity)
/*
* Register Usage:
*
* %o0 = input D$ size
* %o1 = input D$ line size
* %o2 = scratch
* %o3 = scratch
* %o4 = scratch
*/
sub %o0, %o1, %o0 ! init cache line address
/*
* For Panther CPUs, we also need to clear the data parity bits
* using DC_data_parity bit of the ASI_DCACHE_DATA register.
*/
GET_CPU_IMPL(%o3)
cmp %o3, PANTHER_IMPL
bne 1f
clr %o3 ! zero for non-Panther
mov 1, %o3
sll %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
1:
/*
* Set utag = way since it must be unique within an index.
*/
srl %o0, 14, %o2 ! get cache way (DC_way)
membar #Sync ! required before ASI_DC_UTAG
stxa %o2, [%o0]ASI_DC_UTAG ! set D$ utag = cache way
membar #Sync ! required after ASI_DC_UTAG
/*
* Zero line of D$ data (and data parity bits for Panther)
*/
sub %o1, 8, %o2
or %o0, %o3, %o4 ! same address + DC_data_parity
2:
membar #Sync ! required before ASI_DC_DATA
stxa %g0, [%o0 + %o2]ASI_DC_DATA ! zero 8 bytes of D$ data
membar #Sync ! required after ASI_DC_DATA
/*
* We also clear the parity bits if this is a panther. For non-Panther
* CPUs, we simply end up clearing the $data register twice.
*/
stxa %g0, [%o4 + %o2]ASI_DC_DATA
membar #Sync
subcc %o2, 8, %o2
bge 2b
nop
subcc %o0, %o1, %o0
bge 1b
nop
retl
nop
SET_SIZE(correct_dcache_parity)
#endif /* lint */
#endif /* CPU_IMP_L1_CACHE_PARITY */
#if defined(lint)
/*
* Get timestamp (stick).
*/
/* ARGSUSED */
void
stick_timestamp(int64_t *ts)
{
}
#else /* lint */
ENTRY_NP(stick_timestamp)
rd STICK, %g1 ! read stick reg
sllx %g1, 1, %g1
srlx %g1, 1, %g1 ! clear npt bit
retl
stx %g1, [%o0] ! store the timestamp
SET_SIZE(stick_timestamp)
#endif /* lint */
#if defined(lint)
/*
* Set STICK adjusted by skew.
*/
/* ARGSUSED */
void
stick_adj(int64_t skew)
{
}
#else /* lint */
ENTRY_NP(stick_adj)
rdpr %pstate, %g1 ! save processor state
andn %g1, PSTATE_IE, %g3
ba 1f ! cache align stick adj
wrpr %g0, %g3, %pstate ! turn off interrupts
.align 16
1: nop
rd STICK, %g4 ! read stick reg
add %g4, %o0, %o1 ! adjust stick with skew
wr %o1, %g0, STICK ! write stick reg
retl
wrpr %g1, %pstate ! restore processor state
SET_SIZE(stick_adj)
#endif /* lint */
#if defined(lint)
/*
* Debugger-specific stick retrieval
*/
/*ARGSUSED*/
int
kdi_get_stick(uint64_t *stickp)
{
return (0);
}
#else /* lint */
ENTRY_NP(kdi_get_stick)
rd STICK, %g1
stx %g1, [%o0]
retl
mov %g0, %o0
SET_SIZE(kdi_get_stick)
#endif /* lint */
#if defined(lint)
/*
* Invalidate the specified line from the D$.
*
* Register usage:
* %o0 - index for the invalidation, specifies DC_way and DC_addr
*
* ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
* stored to a particular DC_way and DC_addr in ASI_DC_TAG.
*
* The format of the stored 64-bit value is:
*
* +----------+--------+----------+
* | Reserved | DC_tag | DC_valid |
* +----------+--------+----------+
* 63 31 30 1 0
*
* DC_tag is the 30-bit physical tag of the associated line.
* DC_valid is the 1-bit valid field for both the physical and snoop tags.
*
* The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
*
* +----------+--------+----------+----------+
* | Reserved | DC_way | DC_addr | Reserved |
* +----------+--------+----------+----------+
* 63 16 15 14 13 5 4 0
*
* DC_way is a 2-bit index that selects one of the 4 ways.
* DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
*
* Setting the DC_valid bit to zero for the specified DC_way and
* DC_addr index into the D$ results in an invalidation of a D$ line.
*/
/*ARGSUSED*/
void
dcache_inval_line(int index)
{
}
#else /* lint */
ENTRY(dcache_inval_line)
sll %o0, 5, %o0 ! shift index into DC_way and DC_addr
stxa %g0, [%o0]ASI_DC_TAG ! zero the DC_valid and DC_tag bits
membar #Sync
retl
nop
SET_SIZE(dcache_inval_line)
#endif /* lint */
#if defined(lint)
/*
* Invalidate the entire I$
*
* Register usage:
* %o0 - specifies IC_way, IC_addr, IC_tag
* %o1 - scratch
* %o2 - used to save and restore DCU value
* %o3 - scratch
* %o5 - used to save and restore PSTATE
*
* Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
* the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
* block out snoops and invalidates to the I$, causing I$ consistency
* to be broken. Before turning on the I$, all I$ lines must be invalidated.
*
* ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
* stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
* info below describes store (write) use of ASI_IC_TAG. Note that read
* use of ASI_IC_TAG behaves differently.
*
* The format of the stored 64-bit value is:
*
* +----------+--------+---------------+-----------+
* | Reserved | Valid | IC_vpred<7:0> | Undefined |
* +----------+--------+---------------+-----------+
* 63 55 54 53 46 45 0
*
* Valid is the 1-bit valid field for both the physical and snoop tags.
* IC_vpred is the 8-bit LPB bits for 8 instructions starting at
* the 32-byte boundary aligned address specified by IC_addr.
*
* The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
*
* +----------+--------+---------+--------+---------+
* | Reserved | IC_way | IC_addr | IC_tag |Reserved |
* +----------+--------+---------+--------+---------+
* 63 16 15 14 13 5 4 3 2 0
*
* IC_way is a 2-bit index that selects one of the 4 ways.
* IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
* IC_addr[5] is a "don't care" for a store.
* IC_tag set to 2 specifies that the stored value is to be interpreted
* as containing Valid and IC_vpred as described above.
*
* Setting the Valid bit to zero for the specified IC_way and
* IC_addr index into the I$ results in an invalidation of an I$ line.
*/
/*ARGSUSED*/
void
icache_inval_all(void)
{
}
#else /* lint */
ENTRY(icache_inval_all)
rdpr %pstate, %o5
andn %o5, PSTATE_IE, %o3
wrpr %g0, %o3, %pstate ! clear IE bit
GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
ba,pt %icc, 2f
ld [%o0 + CHPR_ICACHE_SIZE], %o0
icache_inval_all_1:
ASM_LD(%o0, icache_size)
ASM_LD(%o1, icache_linesize)
2:
CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
retl
wrpr %g0, %o5, %pstate ! restore earlier pstate
SET_SIZE(icache_inval_all)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
cache_scrubreq_tl1(uint64_t inum, uint64_t index)
{
}
#else /* lint */
/*
* cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
* crosstrap. It atomically increments the outstanding request counter and,
* if there was not already an outstanding request, branches to setsoftint_tl1
* to enqueue an intr_vec for the given inum.
*/
! Register usage:
!
! Arguments:
! %g1 - inum
! %g2 - index into chsm_outstanding array
!
! Internal:
! %g2, %g3, %g5 - scratch
! %g4 - ptr. to scrub_misc chsm_outstanding[index].
! %g6 - setsoftint_tl1 address
ENTRY_NP(cache_scrubreq_tl1)
mulx %g2, CHSM_OUTSTANDING_INCR, %g2
set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
add %g2, %g3, %g2
GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
ld [%g4], %g2 ! cpu's chsm_outstanding[index]
!
! no need to use atomic instructions for the following
! increment - we're at tl1
!
add %g2, 0x1, %g3
brnz,pn %g2, 1f ! no need to enqueue more intr_vec
st %g3, [%g4] ! delay - store incremented counter
ASM_JMP(%g6, setsoftint_tl1)
! not reached
1:
retry
SET_SIZE(cache_scrubreq_tl1)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs)
{}
#else /* lint */
/*
* Get the error state for the processor.
* Note that this must not be used at TL>0
*/
ENTRY(get_cpu_error_state)
#if defined(CHEETAH_PLUS)
set ASI_SHADOW_REG_VA, %o2
ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr reg
stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
ldxa [%o2]ASI_AFAR, %o1 ! shadow afar reg
stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
GET_CPU_IMPL(%o3) ! Only panther has AFSR_EXT registers
cmp %o3, PANTHER_IMPL
bne,a 1f
stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN
set ASI_AFSR_EXT_VA, %o2
ldxa [%o2]ASI_AFSR, %o1 ! afsr_ext reg
stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
set ASI_SHADOW_AFSR_EXT_VA, %o2
ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr_ext reg
stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
b 2f
nop
1:
stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
2:
#else /* CHEETAH_PLUS */
stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
#endif /* CHEETAH_PLUS */
#if defined(SERRANO)
/*
* Serrano has an afar2 which captures the address on FRC/FRU errors.
* We save this in the afar2 of the register save area.
*/
set ASI_MCU_AFAR2_VA, %o2
ldxa [%o2]ASI_MCU_CTRL, %o1
stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2]
#endif /* SERRANO */
ldxa [%g0]ASI_AFSR, %o1 ! primary afsr reg
stx %o1, [%o0 + CH_CPU_ERRORS_AFSR]
ldxa [%g0]ASI_AFAR, %o1 ! primary afar reg
retl
stx %o1, [%o0 + CH_CPU_ERRORS_AFAR]
SET_SIZE(get_cpu_error_state)
#endif /* lint */
#if defined(lint)
/*
* Check a page of memory for errors.
*
* Load each 64 byte block from physical memory.
* Check AFSR after each load to see if an error
* was caused. If so, log/scrub that error.
*
* Used to determine if a page contains
* CEs when CEEN is disabled.
*/
/*ARGSUSED*/
void
cpu_check_block(caddr_t va, uint_t psz)
{}
#else /* lint */
ENTRY(cpu_check_block)
!
! get a new window with room for the error regs
!
save %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
srl %i1, 6, %l4 ! clear top bits of psz
! and divide by 64
rd %fprs, %l2 ! store FP
wr %g0, FPRS_FEF, %fprs ! enable FP
1:
ldda [%i0]ASI_BLK_P, %d0 ! load a block
membar #Sync
ldxa [%g0]ASI_AFSR, %l3 ! read afsr reg
brz,a,pt %l3, 2f ! check for error
nop
!
! if error, read the error regs and log it
!
call get_cpu_error_state
add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
!
! cpu_ce_detected(ch_cpu_errors_t *, flag)
!
call cpu_ce_detected ! log the error
mov CE_CEEN_TIMEOUT, %o1
2:
dec %l4 ! next 64-byte block
brnz,a,pt %l4, 1b
add %i0, 64, %i0 ! increment block addr
wr %l2, %g0, %fprs ! restore FP
ret
restore
SET_SIZE(cpu_check_block)
#endif /* lint */
#if defined(lint)
/*
* Perform a cpu logout called from C. This is used where we did not trap
* for the error but still want to gather "what we can". Caller must make
* sure cpu private area exists and that the indicated logout area is free
* for use, and that we are unable to migrate cpus.
*/
/*ARGSUSED*/
void
cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop)
{ }
#else
ENTRY(cpu_delayed_logout)
rdpr %pstate, %o2
andn %o2, PSTATE_IE, %o2
wrpr %g0, %o2, %pstate ! disable interrupts
PARK_SIBLING_CORE(%o2, %o3, %o4) ! %o2 has DCU value
add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
rd %asi, %g1
wr %g0, ASI_P, %asi
GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
wr %g1, %asi
UNPARK_SIBLING_CORE(%o2, %o3, %o4) ! can use %o2 again
rdpr %pstate, %o2
or %o2, PSTATE_IE, %o2
wrpr %g0, %o2, %pstate
retl
nop
SET_SIZE(cpu_delayed_logout)
#endif /* lint */
#if defined(lint)
/*ARGSUSED*/
int
dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
{ return (0); }
#else
ENTRY(dtrace_blksuword32)
save %sp, -SA(MINFRAME + 4), %sp
rdpr %pstate, %l1
andn %l1, PSTATE_IE, %l2 ! disable interrupts to
wrpr %g0, %l2, %pstate ! protect our FPU diddling
rd %fprs, %l0
andcc %l0, FPRS_FEF, %g0
bz,a,pt %xcc, 1f ! if the fpu is disabled
wr %g0, FPRS_FEF, %fprs ! ... enable the fpu
st %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack
1:
set 0f, %l5
/*
* We're about to write a block full or either total garbage
* (not kernel data, don't worry) or user floating-point data
* (so it only _looks_ like garbage).
*/
ld [%i1], %f0 ! modify the block
membar #Sync
stn %l5, [THREAD_REG + T_LOFAULT] ! set up the lofault handler
stda %d0, [%i0]ASI_BLK_COMMIT_S ! store the modified block
membar #Sync
stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler
bz,a,pt %xcc, 1f
wr %g0, %l0, %fprs ! restore %fprs
ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0
1:
wrpr %g0, %l1, %pstate ! restore interrupts
ret
restore %g0, %g0, %o0
0:
membar #Sync
stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler
bz,a,pt %xcc, 1f
wr %g0, %l0, %fprs ! restore %fprs
ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0
1:
wrpr %g0, %l1, %pstate ! restore interrupts
/*
* If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
* which deals with watchpoints. Otherwise, just return -1.
*/
brnz,pt %i2, 1f
nop
ret
restore %g0, -1, %o0
1:
call dtrace_blksuword32_err
restore
SET_SIZE(dtrace_blksuword32)
#endif /* lint */
#ifdef CHEETAHPLUS_ERRATUM_25
#if defined(lint)
/*
* Claim a chunk of physical address space.
*/
/*ARGSUSED*/
void
claimlines(uint64_t pa, size_t sz, int stride)
{}
#else /* lint */
ENTRY(claimlines)
1:
subcc %o1, %o2, %o1
add %o0, %o1, %o3
bgeu,a,pt %xcc, 1b
casxa [%o3]ASI_MEM, %g0, %g0
membar #Sync
retl
nop
SET_SIZE(claimlines)
#endif /* lint */
#if defined(lint)
/*
* CPU feature initialization,
* turn BPE off,
* get device id.
*/
/*ARGSUSED*/
void
cpu_feature_init(void)
{}
#else /* lint */
ENTRY(cpu_feature_init)
save %sp, -SA(MINFRAME), %sp
sethi %hi(cheetah_bpe_off), %o0
ld [%o0 + %lo(cheetah_bpe_off)], %o0
brz %o0, 1f
nop
rd ASR_DISPATCH_CONTROL, %o0
andn %o0, ASR_DISPATCH_CONTROL_BPE, %o0
wr %o0, 0, ASR_DISPATCH_CONTROL
1:
!
! get the device_id and store the device_id
! in the appropriate cpunodes structure
! given the cpus index
!
CPU_INDEX(%o0, %o1)
mulx %o0, CPU_NODE_SIZE, %o0
set cpunodes + DEVICE_ID, %o1
ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
stx %o2, [%o0 + %o1]
#ifdef CHEETAHPLUS_ERRATUM_34
!
! apply Cheetah+ erratum 34 workaround
!
call itlb_erratum34_fixup
nop
call dtlb_erratum34_fixup
nop
#endif /* CHEETAHPLUS_ERRATUM_34 */
ret
restore
SET_SIZE(cpu_feature_init)
#endif /* lint */
#if defined(lint)
/*
* Copy a tsb entry atomically, from src to dest.
* src must be 128 bit aligned.
*/
/*ARGSUSED*/
void
copy_tsb_entry(uintptr_t src, uintptr_t dest)
{}
#else /* lint */
ENTRY(copy_tsb_entry)
ldda [%o0]ASI_NQUAD_LD, %o2 ! %o2 = tag, %o3 = data
stx %o2, [%o1]
stx %o3, [%o1 + 8 ]
retl
nop
SET_SIZE(copy_tsb_entry)
#endif /* lint */
#endif /* CHEETAHPLUS_ERRATUM_25 */
#ifdef CHEETAHPLUS_ERRATUM_34
#if defined(lint)
/*ARGSUSED*/
void
itlb_erratum34_fixup(void)
{}
#else /* lint */
!
! In Cheetah+ erratum 34, under certain conditions an ITLB locked
! index 0 TTE will erroneously be displaced when a new TTE is
! loaded via ASI_ITLB_IN. In order to avoid cheetah+ erratum 34,
! locked index 0 TTEs must be relocated.
!
! NOTE: Care must be taken to avoid an ITLB miss in this routine.
!
ENTRY_NP(itlb_erratum34_fixup)
rdpr %pstate, %o3
#ifdef DEBUG
PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
#endif /* DEBUG */
wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
ldxa [%g0]ASI_ITLB_ACCESS, %o1 ! %o1 = entry 0 data
ldxa [%g0]ASI_ITLB_TAGREAD, %o2 ! %o2 = entry 0 tag
cmp %o1, %g0 ! Is this entry valid?
bge %xcc, 1f
andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
bnz %icc, 2f
nop
1:
retl ! Nope, outta here...
wrpr %g0, %o3, %pstate ! Enable interrupts
2:
sethi %hi(FLUSH_ADDR), %o4
stxa %g0, [%o2]ASI_ITLB_DEMAP ! Flush this mapping
flush %o4 ! Flush required for I-MMU
!
! Start search from index 1 up. This is because the kernel force
! loads its text page at index 15 in sfmmu_kernel_remap() and we
! don't want our relocated entry evicted later.
!
! NOTE: We assume that we'll be successful in finding an unlocked
! or invalid entry. If that isn't the case there are bound to
! bigger problems.
!
set (1 << 3), %g3
3:
ldxa [%g3]ASI_ITLB_ACCESS, %o4 ! Load TTE from t16
!
! If this entry isn't valid, we'll choose to displace it (regardless
! of the lock bit).
!
cmp %o4, %g0 ! TTE is > 0 iff not valid
bge %xcc, 4f ! If invalid, go displace
andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
bnz,a %icc, 3b ! If locked, look at next
add %g3, (1 << 3), %g3 ! entry
4:
!
! We found an unlocked or invalid entry; we'll explicitly load
! the former index 0 entry here.
!
sethi %hi(FLUSH_ADDR), %o4
set MMU_TAG_ACCESS, %g4
stxa %o2, [%g4]ASI_IMMU
stxa %o1, [%g3]ASI_ITLB_ACCESS
flush %o4 ! Flush required for I-MMU
retl
wrpr %g0, %o3, %pstate ! Enable interrupts
SET_SIZE(itlb_erratum34_fixup)
#endif /* lint */
#if defined(lint)
/*ARGSUSED*/
void
dtlb_erratum34_fixup(void)
{}
#else /* lint */
!
! In Cheetah+ erratum 34, under certain conditions a DTLB locked
! index 0 TTE will erroneously be displaced when a new TTE is
! loaded. In order to avoid cheetah+ erratum 34, locked index 0
! TTEs must be relocated.
!
ENTRY_NP(dtlb_erratum34_fixup)
rdpr %pstate, %o3
#ifdef DEBUG
PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
#endif /* DEBUG */
wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
ldxa [%g0]ASI_DTLB_ACCESS, %o1 ! %o1 = entry 0 data
ldxa [%g0]ASI_DTLB_TAGREAD, %o2 ! %o2 = entry 0 tag
cmp %o1, %g0 ! Is this entry valid?
bge %xcc, 1f
andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
bnz %icc, 2f
nop
1:
retl ! Nope, outta here...
wrpr %g0, %o3, %pstate ! Enable interrupts
2:
stxa %g0, [%o2]ASI_DTLB_DEMAP ! Flush this mapping
membar #Sync
!
! Start search from index 1 up.
!
! NOTE: We assume that we'll be successful in finding an unlocked
! or invalid entry. If that isn't the case there are bound to
! bigger problems.
!
set (1 << 3), %g3
3:
ldxa [%g3]ASI_DTLB_ACCESS, %o4 ! Load TTE from t16
!
! If this entry isn't valid, we'll choose to displace it (regardless
! of the lock bit).
!
cmp %o4, %g0 ! TTE is > 0 iff not valid
bge %xcc, 4f ! If invalid, go displace
andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
bnz,a %icc, 3b ! If locked, look at next
add %g3, (1 << 3), %g3 ! entry
4:
!
! We found an unlocked or invalid entry; we'll explicitly load
! the former index 0 entry here.
!
set MMU_TAG_ACCESS, %g4
stxa %o2, [%g4]ASI_DMMU
stxa %o1, [%g3]ASI_DTLB_ACCESS
membar #Sync
retl
wrpr %g0, %o3, %pstate ! Enable interrupts
SET_SIZE(dtlb_erratum34_fixup)
#endif /* lint */
#endif /* CHEETAHPLUS_ERRATUM_34 */