us3_common_asm.s revision 19f938d5a97467cae5809ba3cee189b02c8e03b7
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#if !defined(lint)
#include "assym.h"
#endif /* !lint */
#include <sys/asm_linkage.h>
#include <vm/hat_sfmmu.h>
#include <sys/machparam.h>
#include <sys/machcpuvar.h>
#include <sys/machthread.h>
#include <sys/machtrap.h>
#include <sys/privregs.h>
#include <sys/cheetahregs.h>
#include <sys/us3_module.h>
#include <sys/cheetahasm.h>
#ifdef TRAPTRACE
#include <sys/traptrace.h>
#endif /* TRAPTRACE */
#if !defined(lint)
/* BEGIN CSTYLED */
nop ;\
nop ;\
/* \
* flushtype = FLUSHALL_TYPE, flush the whole thing \
* tmp3 = cache size \
* tmp1 = cache line size \
*/ \
4: \
nop ;\
/* \
* flushtype = FLUSHPAGE_TYPE \
* arg1 = pfn \
* arg2 = virtual color \
* tmp1 = cache line size \
* tmp2 = tag from cache \
* tmp3 = counter \
*/ \
2: \
4: \
5: \
nop ;\
/* \
* flushtype = FLUSHMATCH_TYPE \
* arg1 = tag to compare against \
* tmp1 = cache line size \
* tmp3 = cache size \
* arg2 = counter \
* tmp2 = cache tag \
*/ \
3: \
4: \
nop ;\
5: \
1:
/* END CSTYLED */
#endif /* !lint */
/*
* Cheetah MMU and Cache operations.
*/
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* flush page from the tlb
*
* %o0 = vaddr
* %o1 = sfmmup
*/
#ifdef DEBUG
#endif /* DEBUG */
/*
* disable ints
*/
/*
* Then, blow out the tlb
* Interrupts are disabled to prevent the primary ctx register
* from changing underneath us.
*/
/*
* For Kernel demaps use primary. type = page implicitly
*/
1:
/*
* User demap. We need to set the primary context properly.
* Secondary context cannot be used for Cheetah IMMU.
* %o0 = vaddr
* %o1 = sfmmup
* %o3 = FLUSH_ADDR
*/
#endif /* lint */
#if defined(lint)
void
vtag_flushall(void)
{}
#else /* lint */
/*
* flush the tlb
*/
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* x-trap to flush page from tlb and tsb
*
* %g1 = vaddr, zero-extended on 32-bit kernel
* %g2 = sfmmup
*
* assumes TSBE_TAG = 0
*/
/* We need to demap in the kernel context */
1:
/* We need to demap in a user context */
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
*
* %g1 = vaddr, zero-extended on 32-bit kernel
* %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
*
* NOTE: this handler relies on the fact that no
* interrupts or traps can occur during the loop
* issuing the TLB_DEMAP operations. It is assumed
* that interrupts are disabled and this code is
* fetching from the kernel locked text address.
*
* assumes TSBE_TAG = 0
*/
/* We need to demap in the kernel context */
4:
1:
/*
* We need to demap in a user context
*
* g2 = sfmmup
* g3 = pgcnt
*/
3:
#endif /* lint */
#if defined(lint)
/*ARGSUSED*/
void
{}
#else /* lint */
/*
* x-trap to flush tlb
*/
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* vac_flushpage(pfnum, color)
* Flush 1 8k page of the D-$ with physical page = pfnum
* Algorithm:
* The cheetah dcache is a 64k psuedo 4 way accaociative cache.
* It is virtual indexed, physically tagged cache.
*/
.seg ".data"
.align 8
/*
* flush page from the d$
*
* %o0 = pfnum, %o1 = color
*/
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* x-trap to flush page from the d$
*
* %g1 = pfnum, %g2 = color
*/
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* In UltraSPARC III flushcolor is same as as flushpage.
* This is because we have an ASI to flush dcache using physical
* address.
* Flushing dcache using physical address is faster because we
* don't have to deal with associativity of dcache.
* The arguments to vac_flushpage() and vac_flushcolor() are same but
* the order is reversed. this is because we maintain compatibility
* with spitfire, in which vac_flushcolor has only one argument, namely
* vcolor.
*/
/*
* %o0 = vcolor, %o1 = pfnum
*/
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* %g1 = vcolor
* %g2 = pfnum
*/
#endif /* lint */
#if defined(lint)
int
idsr_busy(void)
{
return (0);
}
#else /* lint */
/*
* Determine whether or not the IDSR is busy.
* Entry: no arguments
* Returns: 1 if busy, 0 otherwise
*/
1:
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
/* ARGSUSED */
void
{}
#else /* lint */
.asciz "ASI_INTR_DISPATCH_STATUS error: busy"
.align 4
/*
* Setup interrupt dispatch data registers
* Entry:
* %o0 - function or inumber to call
* %o1, %o2 - arguments (2 uint64_t's)
*/
.seg "text"
#ifdef DEBUG
!
!
#endif /* DEBUG */
!
!
1:
!
!
!
!
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{ return; }
#else /* lint */
/*
*/
#endif /* lint */
#endif /* !(JALAPENO || SERRANO) */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* flush_instr_mem:
* Flush 1 page of the I-$ starting at vaddr
* %o0 vaddr
* %o1 bytes to be flushed
* UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
* the stores from all processors so that a FLUSH instruction is only needed
* to ensure pipeline is consistent. This means a single flush is sufficient at
* the end of a sequence of stores that updates the instruction stream to
* ensure correct operation.
*/
#endif /* lint */
#if defined(CPU_IMP_ECACHE_ASSOC)
#if defined(lint)
/* ARGSUSED */
get_ecache_ctrl(void)
{ return (0); }
#else /* lint */
!
!
bne 1f
b 2f
1:
2:
#endif /* lint */
#endif /* CPU_IMP_ECACHE_ASSOC */
/*
* flush_ecache:
* %o0 - 64 bit physical address
* %o1 - ecache size
* %o2 - ecache linesize
*/
#if defined(lint)
/*ARGSUSED*/
void
{}
#else /* !lint */
/*
* For certain CPU implementations, we have to flush the L2 cache
* before flushing the ecache.
*/
/*
* Flush the entire Ecache using displacement flush.
*/
#endif /* lint */
#endif /* !(JALAPENO || SERRANO) */
#if defined(lint)
void
flush_dcache(void)
{}
#else /* lint */
#endif /* lint */
#if defined(lint)
void
flush_icache(void)
{}
#else /* lint */
2:
#endif /* lint */
#if defined(lint)
/*ARGSUSED*/
void
int icache_lsize)
{
}
#else /* lint */
#endif /* lint */
#if defined(lint)
void
flush_pcache(void)
{}
#else /* lint */
#endif /* lint */
#if defined(CPU_IMP_L1_CACHE_PARITY)
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t
* structure (see cheetahregs.h):
* The Dcache *should* be turned off when this code is executed.
*/
.align 128
b 1f
.align 128
1:
2:
blt 2b
/*
* Unlike other CPUs in the family, D$ data parity bits for Panther
* do not reside in the microtag. Instead, we have to read them
* using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
* of just having 8 parity bits to protect all 32 bytes of data
* per line, we now have 32 bits of parity.
*/
bne 4f
/*
* move our pointer to the next field where we store parity bits
* and add the offset of the last parity byte since we will be
* storing all 4 parity bytes within one 64 bit field like this:
*
* +------+------------+------------+------------+------------+
* | - | DC_parity | DC_parity | DC_parity | DC_parity |
* | - | for word 3 | for word 2 | for word 1 | for word 0 |
* +------+------------+------------+------------+------------+
* 63:32 31:24 23:16 15:8 7:0
*/
/* add the DC_data_parity bit into our working index */
3:
blt 3b
4:
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* Get icache data and tag. The data argument is a pointer to a ch_ic_data_t
* structure (see cheetahregs.h):
* The Icache *Must* be turned off when this function is called.
* This is because diagnostic accesses to the Icache interfere with cache
* consistency.
*/
.align 128
2:
blt 2b
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* Get pcache data and tags.
* inputs:
* pcache_idx - fully constructed VA for for accessing P$ diagnostic
* registers. Contains PC_way and PC_addr shifted into
* the correct bit positions. See the PRM for more details.
* data - pointer to a ch_pc_data_t
* structure (see cheetahregs.h):
*/
.align 128
2:
blt 2b
#endif /* lint */
#endif /* CPU_IMP_L1_CACHE_PARITY */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* re-enable the i$, d$, w$, and p$ according to bootup cache state.
* Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
* %o0 - 64 bit constant
*/
#endif /* lint */
#if defined(lint)
get_dcu(void)
{
return ((uint64_t)0);
}
#else /* lint */
/*
* Return DCU register.
*/
#endif /* lint */
/*
*
* This handler is used to check for softints generated by error trap
* handlers to report errors. On Cheetah, this mechanism is used by the
* Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
* ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
ch_pil15_interrupt_instr(void)
{}
#else /* lint */
#endif
#if defined(lint)
void
ch_pil15_interrupt(void)
{}
#else /* lint */
/*
* Since pil_interrupt is hacked to assume that every level 15
* interrupt is generated by the CPU to indicate a performance
* counter overflow this gets ugly. Before calling pil_interrupt
* the Error at TL>0 pending status is inspected. If it is
* non-zero, then an error has occurred and it is handled.
* Otherwise control is transfered to pil_interrupt. Note that if
* an error is detected pil_interrupt will not be called and
* overflow interrupts may be lost causing erroneous performance
* measurements. However, error-recovery will have a detrimental
* effect on performance anyway.
*/
/*
* We have a pending TL>0 error, clear the TL>0 pending status.
*/
/*
* Clear the softint.
*/
/*
* For Cheetah*, call cpu_tl1_error via systrap at PIL 15
* panic flag (%g2).
*/
1:
/*
* The logout is invalid.
*
* Call the default interrupt handler.
*/
#endif
/*
* Error Handling
*
* Cheetah provides error checking for all memory access paths between
* the CPU, External Cache, Cheetah Data Switch and system bus. Error
* information is logged in the AFSR, (also AFSR_EXT for Panther) and
* AFAR and one of the following traps is generated (provided that it
* is enabled in External Cache Error Enable Register) to handle that
* error:
* 1. trap 0x70: Precise trap
* tt0_fecc for errors at trap level(TL)>=0
* 2. trap 0x0A and 0x32: Deferred trap
* async_err for errors at TL>=0
* 3. trap 0x63: Disrupting trap
* ce_err for errors at TL=0
* (Note that trap 0x63 cannot happen at trap level > 0)
*
* Trap level one handlers panic the system except for the fast ecc
* error handler which tries to recover from certain errors.
*/
/*
* FAST ECC TRAP STRATEGY:
*
* Software must handle single and multi bit errors which occur due to data
* or instruction cache reads from the external cache. A single or multi bit
* error occuring in one of these situations results in a precise trap.
*
* The basic flow of this trap handler is as follows:
*
* 1) Record the state and then turn off the Dcache and Icache. The Dcache
* is disabled because bad data could have been installed. The Icache is
* turned off because we want to capture the Icache line related to the
* AFAR.
* 3) Park sibling core if caches are shared (to avoid race condition while
* accessing shared resources such as L3 data staging register during
* CPU logout.
* 4) Read the AFAR and AFSR.
* 5) If CPU logout structure is not being used, then:
* 6) Clear all errors from the AFSR.
* 7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
* 8) Flush Ecache then Flush Dcache and Icache and restore to previous
* state.
* 9) Unpark sibling core if we parked it earlier.
* 10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
* running at PIL 15.
* 6) Otherwise, if CPU logout structure is being used:
* 7) Incriment the "logout busy count".
* 8) Flush Ecache then Flush Dcache and Icache and restore to previous
* state.
* 9) Unpark sibling core if we parked it earlier.
* 10) Issue a retry since the other CPU error logging code will end up
* finding this error bit and logging information about it later.
* 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
* yet initialized such that we can't even check the logout struct, then
* we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
* call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
* to determine information such as TL, TT, CEEN and NCEEN settings, etc
* in the high level trap handler since we don't have access to detailed
* logout information in cases where the cpu_private struct is not yet
* initialized.
*
* We flush the E$ and D$ here on TL=1 code to prevent getting nested
* Fast ECC traps in the TL=0 code. If we get a Fast ECC event here in
* the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
* chance of fixing things up than simply recursing through this code
* again (this would probably cause an eventual kernel stack overflow).
* If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
* can flush the E$ (or the error is a stuck-at bit), we will recurse in
* the Fast ECC at TL>0 handler and eventually Red Mode.
*
* Note that for Cheetah (and only Cheetah), we use alias addresses for
* flushing rather than ASI accesses (which don't exist on Cheetah).
* Should we encounter a Fast ECC error within this handler on Cheetah,
* there's a good chance it's within the ecache_flushaddr buffer (since
* it's the largest piece of memory we touch in the handler and it is
* handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
*/
/*
* Cheetah ecc-protected E$ trap (Trap 70) at TL=0
* tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
* architecture-specific files.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
fecc_err_instr(void)
{}
#else /* lint */
/*
* Save current DCU state. Turn off the Dcache and Icache.
*/
#endif /* lint */
#if defined(lint)
void
fast_ecc_err(void)
{}
#else /* lint */
.section ".text"
.align 64
/*
* Turn off CEEN and NCEEN.
*/
/*
* Check to see whether we need to park our sibling core
* before recording diagnostic information from caches
* which may be shared by both cores.
* We use %g1 to store information about whether or not
* we had to park the core (%g1 holds our DCUCR value and
* we only use bits from that register which are "reserved"
* to keep track of core parking) so that we know whether
* or not to unpark later. %g5 and %g4 are scratch registers.
*/
/*
* Do the CPU log out capture.
* %g3 = "failed?" return value.
* %g2 = Input = AFAR. Output the clo_flags info which is passed
* into this macro via %g4. Output only valid if cpu_private
* struct has not been initialized.
* CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
* %g4 = Trap information stored in the cpu logout flags field
* %g5 = scr1
* %g6 = scr2
* %g3 = scr3
* %g4 = scr4
*/
/* store the CEEN and NCEEN values, TL=0 */
/*
* Flush the Ecache (and L2 cache for Panther) to get the error out
* of the Ecache. If the UCC or UCU is on a dirty line, then the
* following flush will turn that into a WDC or WDU, respectively.
*/
/*
* Flush the Dcache. Since bad data could have been installed in
* the Dcache we must flush it before re-enabling it.
*/
/*
* Flush the Icache. Since we turned off the Icache to capture the
* Icache line it is now stale or corrupted and we must flush it
* before re-enabling it.
*/
6:
/*
* check to see whether we parked our sibling core at the start
* of this handler. If so, we need to unpark it here.
* We use DCUCR reserved bits (stored in %g1) to keep track of
* whether or not we need to unpark. %g5 and %g4 are scratch registers.
*/
/*
* Restore the Dcache and Icache to the previous state.
*/
/*
* Make sure our CPU logout operation was successful.
*/
be 8f
/*
* If the logout structure had been busy, how many times have
* we tried to use it and failed (nesting count)? If we have
* already recursed a substantial number of times, then we can
* assume things are not going to get better by themselves and
* so it would be best to panic.
*/
blt 7f
7:
/*
* Otherwise, if the logout structure was busy but we have not
* nested more times than our maximum value, then we simply
* issue a retry. Our TL=0 trap handler code will check and
* clear the AFSR after it is done logging what is currently
* in the logout struct and handle this event at that time.
*/
8:
/*
* Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
* already at PIL 15.
*/
#endif /* lint */
#endif /* !(JALAPENO || SERRANO) */
/*
*
* The basic flow of this trap handler is as follows:
*
* 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
* software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
* will use to save %g1 and %g2.
* 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
* we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
* handler (using the just saved %g1).
* 3) Turn off the Dcache if it was on and save the state of the Dcache
* (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
* NB: we don't turn off the Icache because bad data is not installed nor
* will we be doing any diagnostic accesses.
* %tpc, %tnpc, %tstate values previously saved).
* 6) set %tl to %tl - 1.
* 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
* 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
* 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear. For
* Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
* Save the values in ch_err_tl1_data. For Panther, read the shadow
* AFSR_EXT and save the value in ch_err_tl1_data.
* 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
* 11) Flush the Ecache.
* NB: the Ecache is flushed assuming the largest possible size with
* the smallest possible line size since access to the cpu_nodes may
* cause an unrecoverable DTLB miss.
* 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
* For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
* read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
* 14) Flush and re-enable the Dcache if it was on at step 3.
* 15) Do TRAPTRACE if enabled.
* 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
* 17) Set the event pending flag in ch_err_tl1_pending[CPU]
* 18) Cause a softint 15. The pil15_interrupt handler will inspect the
* event pending flag and call cpu_tl1_error via systrap if set.
* 19) Restore the registers from step 5 and issue retry.
*/
/*
* Cheetah ecc-protected E$ trap (Trap 70) at TL>0
* tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
* architecture-specific files. This generates a "Software Trap 0" at TL>0,
* which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
fecc_err_tl1_instr(void)
{}
#else /* lint */
#endif /* lint */
/*
* Software trap 0 at TL>0.
* tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
* the various architecture-specific files. This is used as a continuation
* of the fast ecc handling where we've bought an extra TL level, so we can
* use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
* and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
* there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
* order two bits from %g1 and %g2 respectively).
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
fecc_err_tl1_cont_instr(void)
{}
#else /* lint */
#endif /* lint */
#if defined(lint)
void
ce_err(void)
{}
#else /* lint */
/*
* The ce_err function handles disrupting trap type 0x63 at TL=0.
*
* AFSR errors bits which cause this trap are:
* CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
*
* NCEEN Bit of Cheetah External Cache Error Enable Register enables
* the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
*
* CEEN Bit of Cheetah External Cache Error Enable Register enables
* the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
*
* Cheetah+ also handles (No additional processing required):
* DUE, DTO, DBERR (NCEEN controlled)
* THCE (CEEN and ET_ECC_en controlled)
* TUE (ET_ECC_en controlled)
*
* Panther further adds:
* IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
* IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
* TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled)
* L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled)
* THCE (CEEN and L2_tag_ECC_en controlled)
* L3_THCE (CEEN and ET_ECC_en controlled)
*
* Steps:
* 1. Disable hardware corrected disrupting errors only (CEEN)
* 2. Park sibling core if caches are shared (to avoid race
* condition while accessing shared resources such as L3
* data staging register during CPU logout.
* 3. If the CPU logout structure is not currently being used:
* 4. Clear AFSR error bits
* 5. Capture Ecache, Dcache and Icache lines associated
* with AFAR.
* 6. Unpark sibling core if we parked it earlier.
* 7. call cpu_disrupting_error via sys_trap at PIL 14
* unless we're already running at PIL 15.
* 4. Otherwise, if the CPU logout structure is busy:
* 5. Incriment "logout busy count" and place into %g3
* 6. Unpark sibling core if we parked it earlier.
* 7. Issue a retry since the other CPU error logging
* code will end up finding this error bit and logging
* information about it later.
* 5. Alternatively (to 3 and 4 above), if the cpu_private struct is
* not yet initialized such that we can't even check the logout
* struct, then we place the clo_flags data into %g2
* (sys_trap->have_win arg #1) and call cpu_disrupting_error via
* systrap. The clo_flags parameter is used to determine information
* such as TL, TT, CEEN settings, etc in the high level trap
* handler since we don't have access to detailed logout information
* in cases where the cpu_private struct is not yet initialized.
*
* %g3: [ logout busy count ] - arg #2
* %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
*/
.align 128
/*
* Disable trap on hardware corrected errors (CEEN) while at TL=0
* to prevent recursion.
*/
/*
* Save current DCU state. Turn off Icache to allow capture of
* Icache data by DO_CPU_LOGOUT.
*/
/*
* Check to see whether we need to park our sibling core
* before recording diagnostic information from caches
* which may be shared by both cores.
* We use %g1 to store information about whether or not
* we had to park the core (%g1 holds our DCUCR value and
* we only use bits from that register which are "reserved"
* to keep track of core parking) so that we know whether
* or not to unpark later. %g5 and %g4 are scratch registers.
*/
/*
* Do the CPU log out capture.
* %g3 = "failed?" return value.
* %g2 = Input = AFAR. Output the clo_flags info which is passed
* into this macro via %g4. Output only valid if cpu_private
* struct has not been initialized.
* CHPR_CECC_LOGOUT = cpu logout structure offset input
* %g4 = Trap information stored in the cpu logout flags field
* %g5 = scr1
* %g6 = scr2
* %g3 = scr3
* %g4 = scr4
*/
/*
* Flush the Icache. Since we turned off the Icache to capture the
* Icache line it is now stale or corrupted and we must flush it
* before re-enabling it.
*/
2:
/*
* check to see whether we parked our sibling core at the start
* of this handler. If so, we need to unpark it here.
* We use DCUCR reserved bits (stored in %g1) to keep track of
* whether or not we need to unpark. %g5 and %g4 are scratch registers.
*/
/*
* Restore Icache to previous state.
*/
/*
* Make sure our CPU logout operation was successful.
*/
be 4f
/*
* If the logout structure had been busy, how many times have
* we tried to use it and failed (nesting count)? If we have
* already recursed a substantial number of times, then we can
* assume things are not going to get better by themselves and
* so it would be best to panic.
*/
blt 3f
3:
/*
* Otherwise, if the logout structure was busy but we have not
* nested more times than our maximum value, then we simply
* issue a retry. Our TL=0 trap handler code will check and
* clear the AFSR after it is done logging what is currently
* in the logout struct and handle this event at that time.
*/
4:
/*
* Call cpu_disrupting_error via systrap at PIL 14 unless we're
* already at PIL 15.
*/
#endif /* lint */
#if defined(lint)
/*
* This trap cannot happen at TL>0 which means this routine will never
* actually be called and so we treat this like a BAD TRAP panic.
*/
void
ce_err_tl1(void)
{}
#else /* lint */
.align 64
#endif /* lint */
#if defined(lint)
void
async_err(void)
{}
#else /* lint */
/*
* The async_err function handles deferred trap types 0xA
* (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
*
* AFSR errors bits which cause this trap are:
* UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
* On some platforms, EMU may causes cheetah to pull the error pin
* never giving Solaris a chance to take a trap.
*
* NCEEN Bit of Cheetah External Cache Error Enable Register enables
* the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
*
* Steps:
* 1. Disable CEEN and NCEEN errors to prevent recursive errors.
* 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
* I$ line in DO_CPU_LOGOUT.
* 3. Park sibling core if caches are shared (to avoid race
* condition while accessing shared resources such as L3
* data staging register during CPU logout.
* 4. If the CPU logout structure is not currently being used:
* 5. Clear AFSR error bits
* 6. Capture Ecache, Dcache and Icache lines associated
* with AFAR.
* 7. Unpark sibling core if we parked it earlier.
* 8. call cpu_deferred_error via sys_trap.
* 5. Otherwise, if the CPU logout structure is busy:
* 6. Incriment "logout busy count"
* 7. Unpark sibling core if we parked it earlier.
* 8) Issue a retry since the other CPU error logging
* code will end up finding this error bit and logging
* information about it later.
* 6. Alternatively (to 4 and 5 above), if the cpu_private struct is
* not yet initialized such that we can't even check the logout
* struct, then we place the clo_flags data into %g2
* (sys_trap->have_win arg #1) and call cpu_deferred_error via
* systrap. The clo_flags parameter is used to determine information
* such as TL, TT, CEEN settings, etc in the high level trap handler
* since we don't have access to detailed logout information in cases
* where the cpu_private struct is not yet initialized.
*
* %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
* %g3: [ logout busy count ] - arg #2
*/
/*
* Disable CEEN and NCEEN.
*/
/*
* Save current DCU state.
* Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
* Do this regardless of whether this is a Data Access Error or
* Instruction Access Error Trap.
* Disable Dcache for both Data Access Error and Instruction Access
* Error per Cheetah PRM P.5 Note 6.
*/
/*
* Check to see whether we need to park our sibling core
* before recording diagnostic information from caches
* which may be shared by both cores.
* We use %g1 to store information about whether or not
* we had to park the core (%g1 holds our DCUCR value and
* we only use bits from that register which are "reserved"
* to keep track of core parking) so that we know whether
* or not to unpark later. %g6 and %g4 are scratch registers.
*/
/*
* Do the CPU logout capture.
*
* %g3 = "failed?" return value.
* %g2 = Input = AFAR. Output the clo_flags info which is passed
* into this macro via %g4. Output only valid if cpu_private
* struct has not been initialized.
* CHPR_ASYNC_LOGOUT = cpu logout structure offset input
* %g4 = Trap information stored in the cpu logout flags field
* %g5 = scr1
* %g6 = scr2
* %g3 = scr3
* %g4 = scr4
*/
/*
* If the logout struct was busy, we may need to pass the
* TT, TL, and CEEN information to the TL=0 handler via
* systrap parameter so save it off here.
*/
be 1f
1:
/*
* Flush the Icache. Since we turned off the Icache to capture the
* Icache line it is now stale or corrupted and we must flush it
* before re-enabling it.
*/
2:
/*
* XXX - Don't we need to flush the Dcache before turning it back
* on to avoid stale or corrupt data? Was this broken?
*/
/*
* Flush the Dcache before turning it back on since it may now
* contain stale or corrupt data.
*/
/*
* check to see whether we parked our sibling core at the start
* of this handler. If so, we need to unpark it here.
* We use DCUCR reserved bits (stored in %g1) to keep track of
* whether or not we need to unpark. %g5 and %g7 are scratch registers.
*/
/*
* Restore Icache and Dcache to previous state.
*/
/*
* Make sure our CPU logout operation was successful.
*/
be 4f
/*
* If the logout structure had been busy, how many times have
* we tried to use it and failed (nesting count)? If we have
* already recursed a substantial number of times, then we can
* assume things are not going to get better by themselves and
* so it would be best to panic.
*/
blt 3f
3:
/*
* Otherwise, if the logout structure was busy but we have not
* nested more times than our maximum value, then we simply
* issue a retry. Our TL=0 trap handler code will check and
* clear the AFSR after it is done logging what is currently
* in the logout struct and handle this event at that time.
*/
4:
#endif /* lint */
#if defined(CPU_IMP_L1_CACHE_PARITY)
/*
* D$ parity error trap (trap 71) at TL=0.
* tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
* the various architecture-specific files. This merely sets up the
* arguments for cpu_parity_error and calls it via sys_trap.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
dcache_parity_instr(void)
{}
#else /* lint */
#endif /* lint */
/*
* D$ parity error trap (trap 71) at TL>0.
* tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
* the various architecture-specific files. This generates a "Software
* Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
* continue the handling there.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
dcache_parity_tl1_instr(void)
{}
#else /* lint */
#endif /* lint */
/*
* Software trap 1 at TL>0.
* tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
* of the various architecture-specific files. This is used as a continuation
* of the dcache parity handling where we've bought an extra TL level, so we
* can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
* and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
* there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
* order two bits from %g1 and %g2 respectively).
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
{}
#else /* lint */
#endif /* lint */
/*
* D$ parity error at TL>0 handler
* We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter
* this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
*/
#if defined(lint)
void
dcache_parity_tl1_err(void)
{}
#else /* lint */
/*
* This macro saves all the %g registers in the ch_err_tl1_data
* structure, updates the ch_err_tl1_flags and saves the %tpc in
* ch_err_tl1_tpc. At the end of this macro, %g1 will point to
* the ch_err_tl1_data structure and %g2 will have the original
* flags in the ch_err_tl1_data structure. All %g registers
* except for %g1 and %g2 will be available.
*/
#ifdef TRAPTRACE
/*
* Get current trap trace entry physical pointer.
*/
/*
* Create trap trace entry.
*/
/*
* Advance trap trace pointer.
*/
#endif /* TRAPTRACE */
/*
* I$ and D$ are automatically turned off by HW when the CPU hits
* a dcache or icache parity error so we will just leave those two
* off for now to avoid repeating this trap.
* For Panther, however, since we trap on P$ data parity errors
* and HW does not automatically disable P$, we need to disable it
* here so that we don't encounter any recursive traps when we
* issue the retry.
*/
/*
* We fall into this macro if we've successfully logged the error in
* the ch_err_tl1_data structure and want the PIL15 softint to pick
* it up and log it. %g1 must point to the ch_err_tl1_data structure.
* Restores the %g registers and issues retry.
*/
#endif /* lint */
/*
* I$ parity error trap (trap 72) at TL=0.
* tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
* the various architecture-specific files. This merely sets up the
* arguments for cpu_parity_error and calls it via sys_trap.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
icache_parity_instr(void)
{}
#else /* lint */
#endif /* lint */
/*
* I$ parity error trap (trap 72) at TL>0.
* tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
* the various architecture-specific files. This generates a "Software
* Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
* continue the handling there.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
icache_parity_tl1_instr(void)
{}
#else /* lint */
#endif /* lint */
/*
* Software trap 2 at TL>0.
* tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
* of the various architecture-specific files. This is used as a continuation
* of the icache parity handling where we've bought an extra TL level, so we
* can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
* and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
* there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
* order two bits from %g1 and %g2 respectively).
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#if defined(lint)
void
{}
#else /* lint */
#endif /* lint */
/*
* I$ parity error at TL>0 handler
* We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter
* this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
*/
#if defined(lint)
void
icache_parity_tl1_err(void)
{}
#else /* lint */
/*
* This macro saves all the %g registers in the ch_err_tl1_data
* structure, updates the ch_err_tl1_flags and saves the %tpc in
* ch_err_tl1_tpc. At the end of this macro, %g1 will point to
* the ch_err_tl1_data structure and %g2 will have the original
* flags in the ch_err_tl1_data structure. All %g registers
* except for %g1 and %g2 will be available.
*/
#ifdef TRAPTRACE
/*
* Get current trap trace entry physical pointer.
*/
/*
* Create trap trace entry.
*/
/*
* Advance trap trace pointer.
*/
#endif /* TRAPTRACE */
/*
* We fall into this macro if we've successfully logged the error in
* the ch_err_tl1_data structure and want the PIL15 softint to pick
* it up and log it. %g1 must point to the ch_err_tl1_data structure.
* Restores the %g registers and issues retry.
*/
#endif /* lint */
#endif /* CPU_IMP_L1_CACHE_PARITY */
/*
* The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
* tte, the virtual address, and the ctxnum of the specified tlb entry. They
* should only be used in places where you have no choice but to look at the
* tlb itself.
*
* Note: These two routines are required by the Estar "cpr" loadable module.
*/
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
#endif /* lint */
#if defined(lint)
get_safari_config(void)
{ return (0); }
#else /* lint */
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
#endif /* lint */
#endif /* !(JALAPENO || SERRANO) */
#if defined(lint)
void
cpu_cleartickpnt(void)
{}
#else /* lint */
/*
* Clear the NPT (non-privileged trap) bit in the %tick/%stick
* registers. In an effort to make the change in the
* all interrupts while we're changing the registers. We also
* ensure that the read and write instructions are in the same
* line in the instruction cache.
*/
2:
/* clearing NPT bit */
1:
4:
/* clearing NPT bit */
3:
#endif /* lint */
#if defined(CPU_IMP_L1_CACHE_PARITY)
#if defined(lint)
/*
* correct_dcache_parity(size_t size, size_t linesize)
*
* Correct D$ data parity by zeroing the data and initializing microtag
* for all indexes and all ways of the D$.
*
*/
/* ARGSUSED */
void
{}
#else /* lint */
/*
* Register Usage:
*
* %o0 = input D$ size
* %o1 = input D$ line size
* %o2 = scratch
* %o3 = scratch
* %o4 = scratch
*/
/*
* For Panther CPUs, we also need to clear the data parity bits
* using DC_data_parity bit of the ASI_DCACHE_DATA register.
*/
bne 1f
1:
/*
* Set utag = way since it must be unique within an index.
*/
/*
* Zero line of D$ data (and data parity bits for Panther)
*/
2:
/*
* We also clear the parity bits if this is a panther. For non-Panther
* CPUs, we simply end up clearing the $data register twice.
*/
bge 2b
bge 1b
#endif /* lint */
#endif /* CPU_IMP_L1_CACHE_PARITY */
#if defined(lint)
/*
* Get timestamp (stick).
*/
/* ARGSUSED */
void
{
}
#else /* lint */
#endif /* lint */
#if defined(lint)
/*
* Set STICK adjusted by skew.
*/
/* ARGSUSED */
void
{
}
#else /* lint */
.align 16
1: nop
#endif /* lint */
#if defined(lint)
/*
* Debugger-specific stick retrieval
*/
/*ARGSUSED*/
int
{
return (0);
}
#else /* lint */
#endif /* lint */
#if defined(lint)
/*
* Invalidate the specified line from the D$.
*
* Register usage:
* %o0 - index for the invalidation, specifies DC_way and DC_addr
*
* ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
* stored to a particular DC_way and DC_addr in ASI_DC_TAG.
*
* The format of the stored 64-bit value is:
*
* +----------+--------+----------+
* | Reserved | DC_tag | DC_valid |
* +----------+--------+----------+
* 63 31 30 1 0
*
* DC_tag is the 30-bit physical tag of the associated line.
* DC_valid is the 1-bit valid field for both the physical and snoop tags.
*
* The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
*
* +----------+--------+----------+----------+
* | Reserved | DC_way | DC_addr | Reserved |
* +----------+--------+----------+----------+
* 63 16 15 14 13 5 4 0
*
* DC_way is a 2-bit index that selects one of the 4 ways.
*
* Setting the DC_valid bit to zero for the specified DC_way and
* DC_addr index into the D$ results in an invalidation of a D$ line.
*/
/*ARGSUSED*/
void
dcache_inval_line(int index)
{
}
#else /* lint */
#endif /* lint */
#if defined(lint)
/*
* Invalidate the entire I$
*
* Register usage:
* %o0 - specifies IC_way, IC_addr, IC_tag
* %o1 - scratch
* %o2 - used to save and restore DCU value
* %o3 - scratch
* %o5 - used to save and restore PSTATE
*
* Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
* the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
* block out snoops and invalidates to the I$, causing I$ consistency
* to be broken. Before turning on the I$, all I$ lines must be invalidated.
*
* ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
* stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
* info below describes store (write) use of ASI_IC_TAG. Note that read
* use of ASI_IC_TAG behaves differently.
*
* The format of the stored 64-bit value is:
*
* +----------+--------+---------------+-----------+
* | Reserved | Valid | IC_vpred<7:0> | Undefined |
* +----------+--------+---------------+-----------+
* 63 55 54 53 46 45 0
*
* Valid is the 1-bit valid field for both the physical and snoop tags.
* IC_vpred is the 8-bit LPB bits for 8 instructions starting at
* the 32-byte boundary aligned address specified by IC_addr.
*
* The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
*
* +----------+--------+---------+--------+---------+
* | Reserved | IC_way | IC_addr | IC_tag |Reserved |
* +----------+--------+---------+--------+---------+
* 63 16 15 14 13 5 4 3 2 0
*
* IC_way is a 2-bit index that selects one of the 4 ways.
* IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
* IC_addr[5] is a "don't care" for a store.
* IC_tag set to 2 specifies that the stored value is to be interpreted
* as containing Valid and IC_vpred as described above.
*
* Setting the Valid bit to zero for the specified IC_way and
* IC_addr index into the I$ results in an invalidation of an I$ line.
*/
/*ARGSUSED*/
void
icache_inval_all(void)
{
}
#else /* lint */
2:
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{
}
#else /* lint */
/*
* cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
* crosstrap. It atomically increments the outstanding request counter and,
* if there was not already an outstanding request, branches to setsoftint_tl1
* to enqueue an intr_vec for the given inum.
*/
!
! Arguments:
!
! Internal:
!
! no need to use atomic instructions for the following
!
1:
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
{}
#else /* lint */
/*
* Get the error state for the processor.
* Note that this must not be used at TL>0
*/
#if defined(CHEETAH_PLUS)
bne,a 1f
b 2f
1:
2:
#else /* CHEETAH_PLUS */
#endif /* CHEETAH_PLUS */
#if defined(SERRANO)
/*
* We save this in the afar2 of the register save area.
*/
#endif /* SERRANO */
#endif /* lint */
#if defined(lint)
/*
* Check a page of memory for errors.
*
* Load each 64 byte block from physical memory.
* Check AFSR after each load to see if an error
*
* Used to determine if a page contains
* CEs when CEEN is disabled.
*/
/*ARGSUSED*/
void
{}
#else /* lint */
!
!
1:
!
!
!
!
2:
#endif /* lint */
#if defined(lint)
/*
* Perform a cpu logout called from C. This is used where we did not trap
* for the error but still want to gather "what we can". Caller must make
* sure cpu private area exists and that the indicated logout area is free
* for use, and that we are unable to migrate cpus.
*/
/*ARGSUSED*/
void
{ }
#else
#endif /* lint */
#if defined(lint)
/*ARGSUSED*/
int
{ return (0); }
#else
1:
/*
* We're about to write a block full or either total garbage
* (not kernel data, don't worry) or user floating-point data
* (so it only _looks_ like garbage).
*/
1:
0:
1:
/*
* If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
* which deals with watchpoints. Otherwise, just return -1.
*/
1:
#endif /* lint */
#ifdef CHEETAHPLUS_ERRATUM_25
#if defined(lint)
/*
* Claim a chunk of physical address space.
*/
/*ARGSUSED*/
void
{}
#else /* lint */
1:
#endif /* lint */
#if defined(lint)
/*
* CPU feature initialization,
* turn BPE off,
* get device id.
*/
/*ARGSUSED*/
void
cpu_feature_init(void)
{}
#else /* lint */
1:
!
!
#ifdef CHEETAHPLUS_ERRATUM_34
!
!
#endif /* CHEETAHPLUS_ERRATUM_34 */
#endif /* lint */
#if defined(lint)
/*
* Copy a tsb entry atomically, from src to dest.
* src must be 128 bit aligned.
*/
/*ARGSUSED*/
void
{}
#else /* lint */
#endif /* lint */
#endif /* CHEETAHPLUS_ERRATUM_25 */
#ifdef CHEETAHPLUS_ERRATUM_34
#if defined(lint)
/*ARGSUSED*/
void
itlb_erratum34_fixup(void)
{}
#else /* lint */
!
!
!
#ifdef DEBUG
#endif /* DEBUG */
1:
2:
!
! don't want our relocated entry evicted later.
!
! bigger problems.
!
set (1 << 3), %g3
3:
ldxa [%g3]ASI_ITLB_ACCESS, %o4 ! Load TTE from t16
!
! of the lock bit).
!
cmp %o4, %g0 ! TTE is > 0 iff not valid
bge %xcc, 4f ! If invalid, go displace
andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
bnz,a %icc, 3b ! If locked, look at next
add %g3, (1 << 3), %g3 ! entry
4:
!
!
#endif /* lint */
#if defined(lint)
/*ARGSUSED*/
void
dtlb_erratum34_fixup(void)
{}
#else /* lint */
!
!
#ifdef DEBUG
#endif /* DEBUG */
1:
2:
!
!
!
3:
!
!
4:
!
! the former index 0 entry here.
!
set MMU_TAG_ACCESS, %g4
stxa %o2, [%g4]ASI_DMMU
stxa %o1, [%g3]ASI_DTLB_ACCESS
membar #Sync
retl
wrpr %g0, %o3, %pstate ! Enable interrupts
SET_SIZE(dtlb_erratum34_fixup)
#endif /* lint */
#endif /* CHEETAHPLUS_ERRATUM_34 */