cheetahasm.h revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _CHEETAHASM_H
#define _CHEETAHASM_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _ASM
/* BEGIN CSTYLED */
#define ASM_LD(reg, symbol) \
sethi %hi(symbol), reg; \
ld [reg + %lo(symbol)], reg; \
#define ASM_LDX(reg, symbol) \
sethi %hi(symbol), reg; \
ldx [reg + %lo(symbol)], reg; \
#define ASM_JMP(reg, symbol) \
sethi %hi(symbol), reg; \
jmp reg + %lo(symbol); \
nop
/*
* Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
* ptr is in the machcpu structure.
* off_reg: Register offset from 'cpu_private' ptr.
* scr1: Scratch, ptr is returned in this register.
* scr2: Scratch
* label: Label to branch to if cpu_private ptr is null/zero.
*/
#define GET_CPU_PRIVATE_PTR(off_reg, scr1, scr2, label) \
CPU_ADDR(scr1, scr2); \
ldn [scr1 + CPU_PRIVATE], scr1; \
cmp scr1, 0; \
be label; \
nop; \
add scr1, off_reg, scr1
/*
* Macro version of get_dcache_dtag. We use this macro in the
* CPU logout code. Since the Dcache is virtually indexed, only
* bits [12:5] of the AFAR can be used so we need to search through
* 8 indexes (4 ways + bit 13) in order to find the tag we want.
* afar: input AFAR, not modified.
* datap: input ptr to ch_dc_data_t, at end pts to end of ch_dc_data_t.
* scr1: scratch.
* scr2: scratch, will hold tag to look for.
* scr3: used for Dcache index, loops through 4 ways.
*/
#define GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3) \
set CH_DCACHE_IDX_MASK, scr3; \
and afar, scr3, scr3; \
srlx afar, CH_DCTAG_PA_SHIFT, scr2; \
b 1f; \
or scr2, CH_DCTAG_VALID_BIT, scr2; /* tag we want */ \
.align 128; \
1: \
ldxa [scr3]ASI_DC_TAG, scr1; /* read tag */ \
cmp scr1, scr2; \
bne 4f; /* not found? */ \
nop; \
stxa scr3, [datap + CH_DC_IDX]%asi; /* store index */ \
stxa scr1, [datap + CH_DC_TAG]%asi; /* store tag */ \
membar #Sync; /* Cheetah PRM 10.6.3 */ \
ldxa [scr3]ASI_DC_UTAG, scr1; /* read utag */ \
membar #Sync; /* Cheetah PRM 10.6.3 */ \
stxa scr1, [datap + CH_DC_UTAG]%asi; \
ldxa [scr3]ASI_DC_SNP_TAG, scr1; /* read snoop tag */ \
stxa scr1, [datap + CH_DC_SNTAG]%asi; \
add datap, CH_DC_DATA, datap; \
clr scr2; \
2: \
membar #Sync; /* Cheetah PRM 10.6.1 */ \
ldxa [scr3 + scr2]ASI_DC_DATA, scr1; /* read data */ \
membar #Sync; /* Cheetah PRM 10.6.1 */ \
stxa scr1, [datap]%asi; \
add datap, 8, datap; \
cmp scr2, CH_DC_DATA_REG_SIZE - 8; \
blt 2b; \
add scr2, 8, scr2; \
\
GET_CPU_IMPL(scr2); /* Parity bits are elsewhere for */ \
cmp scr2, PANTHER_IMPL; /* panther processors. */ \
bne,a 5f; /* Done if not panther. */ \
add datap, 8, datap; /* Skip to the end of the struct. */ \
clr scr2; \
add datap, 7, datap; /* offset of the last parity byte */ \
mov 1, scr1; \
sll scr1, PN_DC_DATA_PARITY_BIT_SHIFT, scr1; \
or scr3, scr1, scr3; /* add DC_data_parity bit to index */ \
3: \
membar #Sync; /* Cheetah PRM 10.6.1 */ \
ldxa [scr3 + scr2]ASI_DC_DATA, scr1; /* read parity bits */ \
membar #Sync; /* Cheetah PRM 10.6.1 */ \
stba scr1, [datap]%asi; \
dec datap; \
cmp scr2, CH_DC_DATA_REG_SIZE - 8; \
blt 3b; \
add scr2, 8, scr2; \
b 5f; \
add datap, 5, datap; /* set pointer to end of our struct */ \
4: \
set CH_DCACHE_IDX_INCR, scr1; /* incr. idx (scr3) */ \
add scr3, scr1, scr3; \
set CH_DCACHE_IDX_LIMIT, scr1; /* done? */ \
cmp scr3, scr1; \
blt 1b; \
nop; \
add datap, CH_DC_DATA_SIZE, datap; \
5:
/*
* Macro version of get_icache_dtag. We use this macro in the CPU
* logout code. If the Icache is on, we don't want to capture the data.
* afar: input AFAR, not modified.
* datap: input ptr to ch_ic_data_t, at end pts to end of ch_ic_data_t.
* scr1: scratch.
* scr2: scratch, will hold tag to look for.
* scr3: used for Icache index, loops through 4 ways.
* Note: For Panther, the Icache is virtually indexed and increases in
* size to 64KB (instead of 32KB) with a line size of 64 bytes (instead
* of 32). This means the IC_addr index bits[14:7] for Panther now
* correspond to VA bits[13:6]. But since it is virtually indexed, we
* still mask out only bits[12:5] from the AFAR (we have to manually
* check bit 13). In order to make this code work for all processors,
* we end up checking twice as many indexes (8 instead of 4) as required
* for non-Panther CPUs and saving off twice as much data (16 instructions
* instead of just 8).
*/
#define GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3) \
ldxa [%g0]ASI_DCU, scr1; \
btst DCU_IC, scr1; /* is Icache enabled? */ \
bne,a 6f; /* yes, don't capture */ \
add datap, CH_IC_DATA_SIZE, datap; /* anul if no branch */ \
GET_CPU_IMPL(scr2); /* Panther only uses VA[13:6] */ \
cmp scr2, PANTHER_IMPL; /* and we also want to mask */ \
be 1f; /* out bit 13 since the */ \
nop; /* Panther I$ is VIPT. */ \
set CH_ICACHE_IDX_MASK, scr3; \
b 2f; \
nop; \
1: \
set PN_ICACHE_VA_IDX_MASK, scr3; \
2: \
and afar, scr3, scr3; \
sllx scr3, CH_ICACHE_IDX_SHIFT, scr3; \
srlx afar, CH_ICPATAG_SHIFT, scr2; /* pa tag we want */ \
andn scr2, CH_ICPATAG_LBITS, scr2; /* mask off lower */ \
b 3f; \
nop; \
.align 128; \
3: \
ldxa [scr3]ASI_IC_TAG, scr1; /* read pa tag */ \
andn scr1, CH_ICPATAG_LBITS, scr1; /* mask off lower */ \
cmp scr1, scr2; \
bne 5f; /* not found? */ \
nop; \
stxa scr3, [datap + CH_IC_IDX]%asi; /* store index */ \
stxa scr1, [datap + CH_IC_PATAG]%asi; /* store pa tag */ \
add scr3, CH_ICTAG_UTAG, scr3; /* read utag */ \
ldxa [scr3]ASI_IC_TAG, scr1; \
add scr3, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), scr3; \
stxa scr1, [datap + CH_IC_UTAG]%asi; \
ldxa [scr3]ASI_IC_TAG, scr1; /* read upper tag */ \
add scr3, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), scr3; \
stxa scr1, [datap + CH_IC_UPPER]%asi; \
ldxa [scr3]ASI_IC_TAG, scr1; /* read lower tag */ \
andn scr3, CH_ICTAG_TMASK, scr3; \
stxa scr1, [datap + CH_IC_LOWER]%asi; \
ldxa [scr3]ASI_IC_SNP_TAG, scr1; /* read snoop tag */ \
stxa scr1, [datap + CH_IC_SNTAG]%asi; \
add datap, CH_IC_DATA, datap; \
clr scr2; \
4: \
ldxa [scr3 + scr2]ASI_IC_DATA, scr1; /* read ins. data */ \
stxa scr1, [datap]%asi; \
add datap, 8, datap; \
cmp scr2, PN_IC_DATA_REG_SIZE - 8; \
blt 4b; \
add scr2, 8, scr2; \
b 6f; \
nop; \
5: \
set CH_ICACHE_IDX_INCR, scr1; /* incr. idx (scr3) */ \
add scr3, scr1, scr3; \
set PN_ICACHE_IDX_LIMIT, scr1; /* done? */ \
cmp scr3, scr1; \
blt 3b; \
nop; \
add datap, CH_IC_DATA_SIZE, datap; \
6:
#if defined(JALAPENO) || defined(SERRANO)
/*
* Macro version of get_ecache_dtag. We use this macro in the
* CPU logout code.
* afar: input AFAR, not modified
* datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t.
* ec_way: Constant value (way number)
* scr1: Scratch
* scr2: Scratch.
* scr3: Scratch.
*/
#define GET_ECACHE_DTAG(afar, datap, ec_way, scr1, scr2, scr3) \
mov ec_way, scr1; \
and scr1, JP_ECACHE_NWAY - 1, scr1; /* mask E$ way bits */ \
sllx scr1, JP_EC_TAG_DATA_WAY_SHIFT, scr1; \
set ((JP_ECACHE_MAX_SIZE / JP_ECACHE_NWAY) - 1), scr2; \
and afar, scr2, scr3; /* get set offset */ \
andn scr3, (JP_ECACHE_MAX_LSIZE - 1), scr3; /* VA<5:0>=0 */ \
or scr3, scr1, scr3; /* or WAY bits */ \
b 1f; \
stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \
.align 64; \
1: \
JP_EC_DIAG_ACCESS_MEMBAR; \
ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \
JP_EC_DIAG_ACCESS_MEMBAR; \
stxa scr1, [datap + CH_EC_TAG]%asi; \
add datap, CH_EC_DATA, datap; \
2: \
ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \
clr scr1; \
3: /* loop thru 5 regs */ \
ldxa [scr1]ASI_EC_DATA, scr2; \
stxa scr2, [datap]%asi; \
add datap, 8, datap; \
cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \
bne 3b; \
add scr1, 8, scr1; \
btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \
beq 2b; \
add scr3, CH_ECACHE_STGREG_SIZE, scr3
#define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \
GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \
GET_ECACHE_DTAG(afar, datap, 1, scr1, scr2, scr3); \
GET_ECACHE_DTAG(afar, datap, 2, scr1, scr2, scr3); \
GET_ECACHE_DTAG(afar, datap, 3, scr1, scr2, scr3); \
add datap, (CHD_EC_DATA_SETS-4)*CH_EC_DATA_SIZE, datap; \
add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \
/*
* Jalapeno does not have cores so these macros are null.
*/
#define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2)
#define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2)
#if defined(JALAPENO)
/*
* Jalapeno gets primary AFSR and AFAR. All bits in the AFSR except
* the fatal error bits are cleared.
* datap: pointer to cpu logout structure.
* afar: returned primary AFAR value.
* scr1: scratch
* scr2: scratch
*/
#define GET_AFSR_AFAR(datap, afar, scr1, scr2) \
ldxa [%g0]ASI_AFAR, afar; \
stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \
ldxa [%g0]ASI_AFSR, scr2; \
stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \
sethi %hh(C_AFSR_FATAL_ERRS), scr1; \
sllx scr1, 32, scr1; \
bclr scr1, scr2; /* Clear fatal error bits here, so */ \
stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \
membar #Sync
/*
* Jalapeno has no shadow AFAR, null operation.
*/
#define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3)
#elif defined(SERRANO)
/*
* Serrano gets primary AFSR and AFAR. All bits in the AFSR except
* the fatal error bits are cleared. For Serrano, we also save the
* AFAR2 register.
* datap: pointer to cpu logout structure.
* afar: returned primary AFAR value.
* scr1: scratch
* scr2: scratch
*/
#define GET_AFSR_AFAR(datap, afar, scr1, scr2) \
set ASI_MCU_AFAR2_VA, scr1; \
ldxa [scr1]ASI_MCU_CTRL, afar; \
stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR2)]%asi; \
ldxa [%g0]ASI_AFAR, afar; \
stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \
ldxa [%g0]ASI_AFSR, scr2; \
stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \
sethi %hh(C_AFSR_FATAL_ERRS), scr1; \
sllx scr1, 32, scr1; \
bclr scr1, scr2; /* Clear fatal error bits here, so */ \
stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \
membar #Sync
/*
* Serrano needs to capture E$, D$ and I$ lines associated with afar2.
* afar: scratch, holds afar2.
* datap: pointer to cpu logout structure
* scr1: scratch
* scr2: scratch
* scr3: scratch
*/
#define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3) \
ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR2)]%asi, afar; \
add datap, CH_CLO_SDW_DATA + CH_CHD_EC_DATA, datap; \
GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \
GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \
GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \
sub datap, CH_CPU_LOGOUT_SIZE, datap
#endif /* SERRANO */
#elif defined(CHEETAH_PLUS)
/*
* Macro version of get_ecache_dtag. We use this macro in the
* CPU logout code.
* afar: input AFAR, not modified.
* datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t.
* pn_way: ecache way for panther (value = 0-3). For non-panther
* cpus, this macro will be called with pn_way = 0.
* scr1: Scratch.
* scr2: Scratch.
* scr3: Scratch.
*/
#define GET_ECACHE_DTAG(afar, datap, pn_way, scr1, scr2, scr3) \
mov afar, scr3; \
andn scr3, (CH_ECACHE_SUBBLK_SIZE - 1), scr3; /* VA<5:0>=0 */\
set (CH_ECACHE_8M_SIZE - 1), scr2; \
and scr3, scr2, scr3; /* VA<63:23>=0 */ \
mov pn_way, scr1; /* panther L3$ is 4-way so we ... */ \
sllx scr1, PN_L3_WAY_SHIFT, scr1; /* need to mask... */ \
or scr3, scr1, scr3; /* in the way bits <24:23>. */ \
b 1f; \
stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \
.align 64; \
1: \
ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \
stxa scr1, [datap + CH_EC_TAG]%asi; \
set CHP_ECACHE_IDX_TAG_ECC, scr1; \
or scr3, scr1, scr1; \
ldxa [scr1]ASI_EC_DIAG, scr1; /* get E$ tag ECC */ \
stxa scr1, [datap + CH_EC_TAG_ECC]%asi; \
add datap, CH_EC_DATA, datap; \
2: \
ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \
clr scr1; \
3: /* loop thru 5 regs */ \
ldxa [scr1]ASI_EC_DATA, scr2; \
stxa scr2, [datap]%asi; \
add datap, 8, datap; \
cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \
bne 3b; \
add scr1, 8, scr1; \
btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \
beq 2b; \
add scr3, CH_ECACHE_STGREG_SIZE, scr3
/*
* If this is a panther, we need to make sure the sibling core is
* parked so that we avoid any race conditions during diagnostic
* accesses to the shared L2 and L3 caches.
* dcucr_reg: This register will be used to keep track of whether
* or not we need to unpark the core later.
* It just so happens that we also use this same register
* to keep track of our saved DCUCR value so we only touch
* bit 4 of the register (which is a "reserved" bit in the
* DCUCR) for keeping track of core parking.
* scr1: Scratch register.
* scr2: Scratch register.
*/
#define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2) \
GET_CPU_IMPL(scr1); \
cmp scr1, PANTHER_IMPL; /* only park for panthers */ \
bne,a %xcc, 2f; \
andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \
set ASI_CORE_RUNNING_STATUS, scr1; /* check other core */ \
ldxa [scr1]ASI_CMP_SHARED, scr2; /* is it running? */ \
cmp scr2, PN_BOTH_CORES_RUNNING; \
bne,a %xcc, 2f; /* if not running, we are done */ \
andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \
or dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \
set ASI_CORE_ID, scr1; \
ldxa [scr1]ASI_CMP_PER_CORE, scr2; \
and scr2, COREID_MASK, scr2; \
or %g0, 1, scr1; /* find out which core... */ \
sll scr1, scr2, scr2; /* ... we need to park... */ \
1: \
set ASI_CORE_RUNNING_RW, scr1; \
stxa scr2, [scr1]ASI_CMP_SHARED; /* ... and park it. */ \
membar #Sync; /* spin until the... */ \
ldxa [scr1]ASI_CMP_SHARED, scr1; /* ... the other... */ \
cmp scr1, scr2; /* ...core is parked according to... */ \
bne,a %xcc, 1b; /* ...the core running status reg. */ \
nop; \
2:
/*
* The core running this code will unpark its sibling core if the
* sibling core had been parked by the current core earlier in this
* trap handler.
* dcucr_reg: This register is used to keep track of whether or not
* we need to unpark our sibling core.
* It just so happens that we also use this same register
* to keep track of our saved DCUCR value so we only touch
* bit 4 of the register (which is a "reserved" bit in the
* DCUCR) for keeping track of core parking.
* scr1: Scratch register.
* scr2: Scratch register.
*/
#define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2) \
btst PN_PARKED_OTHER_CORE, dcucr_reg; \
bz,pt %xcc, 1f; /* if nothing to unpark, we are done */ \
andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \
set ASI_CORE_RUNNING_RW, scr1; \
set PN_BOTH_CORES_RUNNING, scr2; /* we want both... */ \
stxa scr2, [scr1]ASI_CMP_SHARED; /* ...cores running. */ \
membar #Sync; \
1:
/*
* Cheetah+ and Jaguar get both primary and secondary AFSR/AFAR. All bits
* in the primary AFSR are cleared except the fatal error bits. For Panther,
* we also have to read and clear the AFSR_EXT, again leaving the fatal
* error bits alone.
* datap: pointer to cpu logout structure.
* afar: returned primary AFAR value.
* scr1: scratch
* scr2: scratch
*/
#define GET_AFSR_AFAR(datap, afar, scr1, scr2) \
set ASI_SHADOW_REG_VA, scr1; \
ldxa [scr1]ASI_AFAR, scr2; \
stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFAR)]%asi; \
ldxa [scr1]ASI_AFSR, scr2; \
stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFSR)]%asi; \
ldxa [%g0]ASI_AFAR, afar; \
stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \
ldxa [%g0]ASI_AFSR, scr2; \
stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \
sethi %hh(C_AFSR_FATAL_ERRS), scr1; \
sllx scr1, 32, scr1; \
bclr scr1, scr2; /* Clear fatal error bits here, so */ \
stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \
membar #Sync; \
GET_CPU_IMPL(scr1); \
cmp scr1, PANTHER_IMPL; \
bne %xcc, 1f; \
nop; \
set ASI_SHADOW_AFSR_EXT_VA, scr1; /* shadow AFSR_EXT */ \
ldxa [scr1]ASI_AFSR, scr2; \
stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFSR_EXT)]%asi; \
set ASI_AFSR_EXT_VA, scr1; /* primary AFSR_EXT */ \
ldxa [scr1]ASI_AFSR, scr2; \
stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR_EXT)]%asi; \
set C_AFSR_EXT_FATAL_ERRS, scr1; \
bclr scr1, scr2; /* Clear fatal error bits here, */ \
set ASI_AFSR_EXT_VA, scr1; /* so they're left */ \
stxa scr2, [scr1]ASI_AFSR; /* as is in AFSR_EXT */ \
membar #Sync; \
1:
/*
* This macro is used in the CPU logout code to capture diagnostic
* information from the L2 cache on panther processors.
* afar: input AFAR, not modified.
* datap: Ptr to pn_l2_data_t, at end pts just past pn_l2_data_t.
* scr1: Scratch.
* scr2: Scratch.
* scr3: Scratch.
*/
#define GET_PN_L2_CACHE_DTAGS(afar, datap, scr1, scr2, scr3) \
mov afar, scr3; \
set PN_L2_INDEX_MASK, scr1; \
and scr3, scr1, scr3; \
b 1f; /* code to read tags and data should be ... */ \
nop; /* ...on the same cache line if possible. */ \
.align 128; /* update this line if you add lines below. */ \
1: \
stxa scr3, [datap + CH_EC_IDX]%asi; /* store L2$ index */ \
ldxa [scr3]ASI_L2_TAG, scr1; /* read the L2$ tag */ \
stxa scr1, [datap + CH_EC_TAG]%asi; \
add datap, CH_EC_DATA, datap; \
clr scr1; \
2: \
ldxa [scr3 + scr1]ASI_L2_DATA, scr2; /* loop through */ \
stxa scr2, [datap]%asi; /* <511:256> of L2 */ \
add datap, 8, datap; /* data and record */ \
cmp scr1, (PN_L2_LINESIZE / 2) - 8; /* it in the cpu */ \
bne 2b; /* logout struct. */ \
add scr1, 8, scr1; \
set PN_L2_DATA_ECC_SEL, scr2; /* ECC_sel bit. */ \
ldxa [scr3 + scr2]ASI_L2_DATA, scr2; /* Read and record */ \
stxa scr2, [datap]%asi; /* ecc of <511:256> */ \
add datap, 8, datap; \
3: \
ldxa [scr3 + scr1]ASI_L2_DATA, scr2; /* loop through */ \
stxa scr2, [datap]%asi; /* <255:0> of L2 */ \
add datap, 8, datap; /* data and record */ \
cmp scr1, PN_L2_LINESIZE - 8; /* it in the cpu */ \
bne 3b; /* logout struct. */ \
add scr1, 8, scr1; \
set PN_L2_DATA_ECC_SEL, scr2; /* ECC_sel bit. */ \
add scr2, PN_L2_ECC_LO_REG, scr2; \
ldxa [scr3 + scr2]ASI_L2_DATA, scr2; /* Read and record */ \
stxa scr2, [datap]%asi; /* ecc of <255:0>. */ \
add datap, 8, datap; /* Advance pointer */ \
set PN_L2_SET_SIZE, scr2; \
set PN_L2_MAX_SET, scr1; \
cmp scr1, scr3; /* more ways to try for this line? */ \
bg,a %xcc, 1b; /* if so, start over with next way */ \
add scr3, scr2, scr3
/*
* Cheetah+ assumes E$ is 2-way and grabs both E$ lines associated with afar.
* afar: AFAR from access.
* datap: pointer to cpu logout structure.
* scr1: scratch
* scr2: scratch
* scr3: scratch
*/
#define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \
GET_CPU_IMPL(scr1); \
cmp scr1, PANTHER_IMPL; \
bne %xcc, 4f; \
nop; \
GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \
GET_ECACHE_DTAG(afar, datap, 1, scr1, scr2, scr3); \
GET_ECACHE_DTAG(afar, datap, 2, scr1, scr2, scr3); \
GET_ECACHE_DTAG(afar, datap, 3, scr1, scr2, scr3); \
add datap, (CHD_EC_DATA_SETS-4)*CH_EC_DATA_SIZE, datap; \
GET_PN_L2_CACHE_DTAGS(afar, datap, scr1, scr2, scr3); \
b 5f; \
nop; \
4: \
GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \
GET_ECACHE_WAY_BIT(scr1, scr2); \
xor afar, scr1, afar; \
GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \
GET_ECACHE_WAY_BIT(scr1, scr2); /* restore AFAR */ \
xor afar, scr1, afar; \
add datap, (CHD_EC_DATA_SETS-2)*CH_EC_DATA_SIZE, datap; \
add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \
5:
/*
* Cheetah+ needs to capture E$, D$ and I$ lines associated with
* shadow afar.
* afar: scratch, holds shadow afar.
* datap: pointer to cpu logout structure
* scr1: scratch
* scr2: scratch
* scr3: scratch
*/
#define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3) \
ldxa [datap + (CH_CLO_SDW_DATA + CH_CHD_AFAR)]%asi, afar; \
add datap, CH_CLO_SDW_DATA + CH_CHD_EC_DATA, datap; \
GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \
GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \
GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \
sub datap, CH_CPU_LOGOUT_SIZE, datap
/*
* Compute the "Way" bit for 2-way Ecache for Cheetah+.
*/
#define GET_ECACHE_WAY_BIT(scr1, scr2) \
CPU_INDEX(scr1, scr2); \
mulx scr1, CPU_NODE_SIZE, scr1; \
add scr1, ECACHE_SIZE, scr1; \
set cpunodes, scr2; \
ld [scr1 + scr2], scr1; \
srlx scr1, 1, scr1
#else /* CHEETAH_PLUS */
/*
* Macro version of get_ecache_dtag. We use this macro in the
* CPU logout code.
* afar: input AFAR, not modified.
* datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t.
* scr1: Scratch.
* scr2: Scratch.
* scr3: Scratch.
*/
#define GET_ECACHE_DTAG(afar, datap, scr1, scr2, scr3) \
mov afar, scr3; \
andn scr3, (CH_ECACHE_SUBBLK_SIZE - 1), scr3; /* VA<5:0>=0 */\
set (CH_ECACHE_8M_SIZE - 1), scr2; \
and scr3, scr2, scr3; /* VA<63:23>=0 */ \
b 1f; \
stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \
.align 64; \
1: \
ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \
stxa scr1, [datap + CH_EC_TAG]%asi; \
add datap, CH_EC_DATA, datap; \
2: \
ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \
clr scr1; \
3: /* loop thru 5 regs */ \
ldxa [scr1]ASI_EC_DATA, scr2; \
stxa scr2, [datap]%asi; \
add datap, 8, datap; \
cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \
bne 3b; \
add scr1, 8, scr1; \
btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \
beq 2b; \
add scr3, CH_ECACHE_STGREG_SIZE, scr3
/*
* Cheetah does not have cores so these macros are null.
*/
#define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2)
#define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2)
/*
* Cheetah gets primary AFSR and AFAR and clears the AFSR, except for the
* fatal error bits.
* datap: pointer to cpu logout structure.
* afar: returned primary AFAR value.
* scr1: scratch
* scr2: scratch
*/
#define GET_AFSR_AFAR(datap, afar, scr1, scr2) \
ldxa [%g0]ASI_AFAR, afar; \
stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \
ldxa [%g0]ASI_AFSR, scr2; \
stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \
sethi %hh(C_AFSR_FATAL_ERRS), scr1; \
sllx scr1, 32, scr1; \
bclr scr1, scr2; /* Clear fatal error bits here, so */ \
stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \
membar #Sync
/*
* Cheetah E$ is direct-mapped, so we grab line data and skip second line.
* afar: AFAR from access.
* datap: pointer to cpu logout structure.
* scr1: scratch
* scr2: scratch
* scr3: scratch
*/
#define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \
GET_ECACHE_DTAG(afar, datap, scr1, scr2, scr3); \
add datap, (CHD_EC_DATA_SETS-1)*CH_EC_DATA_SIZE, datap; \
add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \
/*
* Cheetah has no shadow AFAR, null operation.
*/
#define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3)
#endif /* CHEETAH_PLUS */
/*
* Cheetah/(Cheetah+ Jaguar Panther)/Jalapeno Macro for capturing CPU
* logout data at TL>0. r_val is a register that returns the "failure count"
* to the caller, and may be used as a scratch register until the end of
* the macro. afar is used to return the primary AFAR value to the caller
* and it too can be used as a scratch register until the end. r_or_s is
* a reg or symbol that has the offset within the "cpu_private" data area
* to deposit the logout data. t_flags is a register that has the
* trap-type/trap-level/CEEN info. This t_flags register may be used after
* the GET_AFSR_AFAR macro.
*
* The CPU logout operation will fail (r_val > 0) if the logout
* structure in question is already being used. Otherwise, the CPU
* logout operation will succeed (r_val = 0). For failures, r_val
* returns the busy count (# of times we tried using this CPU logout
* structure when it was busy.)
*
* Register usage:
* %asi: Must be set to either ASI_MEM if the address in datap
* is a physical address or to ASI_N if the address in
* datap is a virtual address.
* r_val: This register is the return value which tells the
* caller whether or not the LOGOUT operation was successful.
* For failures, r_val returns the fail count (i.e. number of
* times we have tried to use this logout structure when it was
* already being used.
* afar: output: contains AFAR on exit
* t_flags: input trap type info, may be used as scratch after stored
* to cpu log out structure.
* datap: Points to log out data area.
* scr1: Scratch
* scr2: Scratch (may be r_val)
* scr3: Scratch (may be t_flags)
*/
#define DO_TL1_CPU_LOGOUT(r_val, afar, t_flags, datap, scr1, scr2, scr3) \
setx LOGOUT_INVALID, scr2, scr1; \
ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi, scr2; \
cmp scr2, scr1; \
bne 8f; \
nop; \
stxa t_flags, [datap + CH_CLO_FLAGS]%asi; \
GET_AFSR_AFAR(datap, afar, scr1, scr2); \
add datap, CH_CLO_DATA + CH_CHD_EC_DATA, datap; \
GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \
GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \
GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \
sub datap, CH_CLO_DATA + CH_DIAG_DATA_SIZE, datap; \
GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3); \
ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi, afar; \
set 0, r_val; /* return value for success */ \
ba 9f; \
nop; \
8: \
ldxa [%g0]ASI_AFAR, afar; \
ldxa [datap + CH_CLO_NEST_CNT]%asi, r_val; \
inc r_val; /* return value for failure */ \
stxa r_val, [datap + CH_CLO_NEST_CNT]%asi; \
membar #Sync; \
9:
/*
* Cheetah/(Cheetah+ Jaguar Panther)/Jalapeno Macro for capturing CPU
* logout data. Uses DO_TL1_CPU_LOGOUT macro defined above, and sets
* up the expected data pointer in the scr1 register and sets the %asi
* register to ASI_N for kernel virtual addresses instead of ASI_MEM as
* is used at TL>0.
*
* The CPU logout operation will fail (r_val > 0) if the logout
* structure in question is already being used. Otherwise, the CPU
* logout operation will succeed (r_val = 0). For failures, r_val
* returns the busy count (# of times we tried using this CPU logout
* structure when it was busy.)
*
* Register usage:
* r_val: This register is the return value which tells the
* caller whether or not the LOGOUT operation was successful.
* For failures, r_val returns the fail count (i.e. number of
* times we have tried to use this logout structure when it was
* already being used.
* afar: returns AFAR, used internally as afar value.
* output: if the cpu_private struct has not been initialized,
* then we return the t_flags value listed below.
* r_or_s: input offset, either register or constant (symbol). It's
* OK for r_or_s to be a register as long as it's not scr1 or
* scr3.
* t_flags: input trap type info, may be used as scratch after stored
* to cpu log out structure.
* scr1: Scratch, points to log out data area.
* scr2: Scratch (may be r_or_s)
* scr3: Scratch (may be r_val)
* scr4: Scratch (may be t_flags)
*/
#define DO_CPU_LOGOUT(r_val, afar, r_or_s, t_flags, scr1, scr2, scr3, scr4) \
GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr3, 7f); /* can't use scr2/4 */ \
wr %g0, ASI_N, %asi; \
DO_TL1_CPU_LOGOUT(r_val, afar, t_flags, scr1, scr2, scr3, scr4) \
ba 6f; \
nop; \
7: \
mov t_flags, afar; /* depends on afar = %g2 */ \
set 0, r_val; /* success in this case. */ \
6:
/*
* The P$ is flushed as a side effect of writing to the Primary
* or Secondary Context Register. After writing to a context
* register, every line of the P$ in the Valid state is invalidated,
* regardless of which context it belongs to.
* This routine simply touches the Primary context register by
* reading the current value and writing it back. The Primary
* context is not changed.
*/
#define PCACHE_FLUSHALL(tmp1, tmp2, tmp3) \
sethi %hi(FLUSH_ADDR), tmp1 ;\
set MMU_PCONTEXT, tmp2 ;\
ldxa [tmp2]ASI_DMMU, tmp3 ;\
stxa tmp3, [tmp2]ASI_DMMU ;\
flush tmp1 /* See Cheetah PRM 8.10.2 */
/*
* Macro that flushes the entire Dcache.
*
* arg1 = dcache size
* arg2 = dcache linesize
*/
#define CH_DCACHE_FLUSHALL(arg1, arg2, tmp1) \
sub arg1, arg2, tmp1; \
1: \
stxa %g0, [tmp1]ASI_DC_TAG; \
membar #Sync; \
cmp %g0, tmp1; \
bne,pt %icc, 1b; \
sub tmp1, arg2, tmp1;
/*
* Macro that flushes the entire Icache.
*
* Note that we cannot access ASI 0x67 (ASI_IC_TAG) with the Icache on,
* because accesses to ASI 0x67 interfere with Icache coherency. We
* must make sure the Icache is off, then turn it back on after the entire
* cache has been invalidated. If the Icache is originally off, we'll just
* clear the tags but not turn the Icache on.
*
* arg1 = icache size
* arg2 = icache linesize
*/
#define CH_ICACHE_FLUSHALL(arg1, arg2, tmp1, tmp2) \
ldxa [%g0]ASI_DCU, tmp2; \
andn tmp2, DCU_IC, tmp1; \
stxa tmp1, [%g0]ASI_DCU; \
flush %g0; /* flush required after changing the IC bit */ \
sllx arg2, 1, arg2; /* arg2 = linesize * 2 */ \
sllx arg1, 1, arg1; /* arg1 = size * 2 */ \
sub arg1, arg2, arg1; \
or arg1, CH_ICTAG_LOWER, arg1; /* "write" tag */ \
1: \
stxa %g0, [arg1]ASI_IC_TAG; \
membar #Sync; /* Cheetah PRM 8.9.3 */ \
cmp arg1, CH_ICTAG_LOWER; \
bne,pt %icc, 1b; \
sub arg1, arg2, arg1; \
stxa tmp2, [%g0]ASI_DCU; \
flush %g0; /* flush required after changing the IC bit */
#if defined(JALAPENO) || defined(SERRANO)
/*
* ASI access to the L2 tag or L2 flush can hang the cpu when interacting
* with combinations of L2 snoops, victims and stores.
*
* A possible workaround is to surround each L2 ASI access with membars
* and make sure that the code is hitting in the Icache. This requires
* aligning code sequence at E$ boundary and forcing I$ fetch by
* jumping to selected offsets so that we don't take any I$ misses
* during ASI access to the L2 tag or L2 flush. This also requires
* making sure that we don't take any interrupts or traps (such as
* fast ECC trap, I$/D$ tag parity error) which can result in eviction
* of this code sequence from I$, thus causing a miss.
*
* Because of the complexity/risk, we have decided to do a partial fix
* of adding membar around each ASI access to the L2 tag or L2 flush.
*/
#define JP_EC_DIAG_ACCESS_MEMBAR \
membar #Sync
/*
* Jalapeno version of macro that flushes the entire Ecache.
*
* Uses Jalapeno displacement flush feature of ASI_EC_DIAG.
*
* arg1 = ecache size
* arg2 = ecache linesize - not modified; can be an immediate constant.
*/
#define ECACHE_FLUSHALL(arg1, arg2, tmp1, tmp2) \
CPU_INDEX(tmp1, tmp2); \
set JP_ECACHE_IDX_DISP_FLUSH, tmp2; \
sllx tmp1, JP_ECFLUSH_PORTID_SHIFT, tmp1; \
or tmp1, tmp2, tmp1; \
srlx arg1, JP_EC_TO_SET_SIZE_SHIFT, tmp2; \
1: \
subcc tmp2, arg2, tmp2; \
JP_EC_DIAG_ACCESS_MEMBAR; \
ldxa [tmp1 + tmp2]ASI_EC_DIAG, %g0; \
JP_EC_DIAG_ACCESS_MEMBAR; \
bg,pt %xcc, 1b; \
nop; \
mov 1, tmp2; \
sllx tmp2, JP_ECFLUSH_EC_WAY_SHIFT, tmp2; \
add tmp1, tmp2, tmp1; \
mov (JP_ECACHE_NWAY-1), tmp2; \
sllx tmp2, JP_ECFLUSH_EC_WAY_SHIFT, tmp2; \
andcc tmp1, tmp2, tmp2; \
bnz,pt %xcc, 1b; \
srlx arg1, JP_EC_TO_SET_SIZE_SHIFT, tmp2
#else /* JALAPENO || SERRANO */
/*
* Cheetah version of macro that flushes the entire Ecache.
*
* Need to displacement flush 2x ecache size from Ecache flush area.
*
* arg1 = ecache size
* arg2 = ecache linesize
* arg3 = ecache flush address - for cheetah only
*/
#define CH_ECACHE_FLUSHALL(arg1, arg2, arg3) \
sllx arg1, 1, arg1; \
1: \
subcc arg1, arg2, arg1; \
bg,pt %xcc, 1b; \
ldxa [arg1 + arg3]ASI_MEM, %g0;
/*
* Cheetah+ version of macro that flushes the entire Ecache.
*
* Uses the displacement flush feature.
*
* arg1 = ecache size
* arg2 = ecache linesize
* impl = CPU implementation as returned from GET_CPU_IMPL()
* The value in this register is destroyed during execution
* of the macro.
*/
#if defined(CHEETAH_PLUS)
#define CHP_ECACHE_FLUSHALL(arg1, arg2, impl) \
cmp impl, PANTHER_IMPL; \
bne %xcc, 1f; \
nop; \
set PN_L3_IDX_DISP_FLUSH, impl; \
b 2f; \
nop; \
1: \
set CHP_ECACHE_IDX_DISP_FLUSH, impl; \
2: \
subcc arg1, arg2, arg1; \
bg,pt %xcc, 2b; \
ldxa [arg1 + impl]ASI_EC_DIAG, %g0;
#else /* CHEETAH_PLUS */
#define CHP_ECACHE_FLUSHALL(arg1, arg2, impl)
#endif /* CHEETAH_PLUS */
/*
* Macro that flushes the entire Ecache.
*
* arg1 = ecache size
* arg2 = ecache linesize
* arg3 = ecache flush address - for cheetah only
*/
#define ECACHE_FLUSHALL(arg1, arg2, arg3, tmp1) \
GET_CPU_IMPL(tmp1); \
cmp tmp1, CHEETAH_IMPL; \
bne %xcc, 2f; \
nop; \
CH_ECACHE_FLUSHALL(arg1, arg2, arg3); \
ba 3f; \
nop; \
2: \
CHP_ECACHE_FLUSHALL(arg1, arg2, tmp1); \
3:
#endif /* JALAPENO || SERRANO */
/*
* Macro that flushes the Panther L2 cache.
*/
#if defined(CHEETAH_PLUS)
#define PN_L2_FLUSHALL(scr1, scr2, scr3) \
GET_CPU_IMPL(scr3); \
cmp scr3, PANTHER_IMPL; \
bne %xcc, 2f; \
nop; \
set PN_L2_SIZE, scr1; \
set PN_L2_LINESIZE, scr2; \
set PN_L2_IDX_DISP_FLUSH, scr3; \
1: \
subcc scr1, scr2, scr1; \
bg,pt %xcc, 1b; \
ldxa [scr1 + scr3]ASI_L2_TAG, %g0; \
2:
#else /* CHEETAH_PLUS */
#define PN_L2_FLUSHALL(scr1, scr2, scr3)
#endif /* CHEETAH_PLUS */
/*
* Given a VA and page size (page size as encoded in ASI_MMU_TAG_ACCESS_EXT),
* this macro returns the TLB index for that mapping based on a 512 entry
* (2-way set associative) TLB. Aaside from the 16 entry fully associative
* TLBs, all TLBs in Panther are 512 entry, 2-way set associative.
*
* To find the index, we shift the VA right by 13 + (3 * pg_sz) and then
* mask out all but the lower 8 bits because:
*
* ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 0 for 8K
* ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 1 for 64K
* ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 2 for 512K
* ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 3 for 4M
* ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 4 for 32M
* ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 5 for 256M
*
* and
*
* array index for 8K pages = VA[20:13]
* array index for 64K pages = VA[23:16]
* array index for 512K pages = VA[26:19]
* array index for 4M pages = VA[29:22]
* array index for 32M pages = VA[32:25]
* array index for 256M pages = VA[35:28]
*
* Inputs:
*
* va - Register.
* Input: Virtual address in which we are interested.
* Output: TLB index value.
* pg_sz - Register. Page Size of the TLB in question as encoded
* in the ASI_[D|I]MMU_TAG_ACCESS_EXT register.
*/
#if defined(CHEETAH_PLUS)
#define PN_GET_TLB_INDEX(va, pg_sz) \
srlx va, 13, va; /* first shift the 13 bits and then */ \
srlx va, pg_sz, va; /* shift by pg_sz three times. */ \
srlx va, pg_sz, va; \
srlx va, pg_sz, va; \
and va, 0xff, va; /* mask out all but the lower 8 bits */
#endif /* CHEETAH_PLUS */
/*
* The following macros are for error traps at TL>0.
* The issue with error traps at TL>0 is that there are no safely
* available global registers. So we use the trick of generating a
* software trap, then using the %tpc, %tnpc and %tstate registers to
* temporarily save the values of %g1 and %g2.
*/
/*
* Macro to generate 8-instruction trap table entry for TL>0 trap handlers.
* Does the following steps:
* 1. membar #Sync - required for USIII family errors.
* 2. Specified software trap.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#define CH_ERR_TL1_TRAPENTRY(trapno) \
membar #Sync; \
ta trapno; \
nop; nop; nop; nop; nop; nop
/*
* Macro to generate 8-instruction trap table entry for TL>0 software trap.
* We save the values of %g1 and %g2 in %tpc, %tnpc and %tstate (since
* the low-order two bits of %tpc/%tnpc are reserved and read as zero,
* we need to put the low-order two bits of %g1 and %g2 in %tstate).
* Note that %tstate has a reserved hole from bits 3-7, so we put the
* low-order two bits of %g1 in bits 0-1 and the low-order two bits of
* %g2 in bits 10-11 (insuring bits 8-9 are zero for use by the D$/I$
* state bits). Note that we must do a jmp instruction, since this
* is moved into the trap table entry.
* NB: Must be 8 instructions or less to fit in trap table and code must
* be relocatable.
*/
#define CH_ERR_TL1_SWTRAPENTRY(label) \
wrpr %g1, %tpc; \
and %g1, 3, %g1; \
wrpr %g2, %tnpc; \
sllx %g2, CH_ERR_G2_TO_TSTATE_SHFT, %g2; \
or %g1, %g2, %g2; \
sethi %hi(label), %g1; \
jmp %g1+%lo(label); \
wrpr %g2, %tstate
/*
* Macro to get ptr to ch_err_tl1_data.
* reg1 will either point to a physaddr with ASI_MEM in %asi OR it
* will point to a kernel nucleus virtual address with ASI_N in %asi.
* This allows us to:
* 1. Avoid getting MMU misses. We may have gotten the original
* Fast ECC error in an MMU handler and if we get an MMU trap
* in the TL>0 handlers, we'll scribble on the MMU regs.
* 2. Allows us to use the same code in the TL>0 handlers whether
* we're accessing kernel nucleus virtual addresses or physical
* addresses.
* pseudo-code:
* reg1 <- ch_err_tl1_paddrs[CPUID];
* if (reg1 == NULL) {
* reg1 <- &ch_err_tl1_data
* %asi <- ASI_N
* } else {
* reg1 <- reg1 + offset +
* sizeof (ch_err_tl1_data) * (%tl - 3)
* %asi <- ASI_MEM
* }
*/
#define GET_CH_ERR_TL1_PTR(reg1, reg2, offset) \
CPU_INDEX(reg1, reg2); \
sllx reg1, 3, reg1; \
set ch_err_tl1_paddrs, reg2; \
ldx [reg1+reg2], reg1; \
brnz reg1, 1f; \
add reg1, offset, reg1; \
set ch_err_tl1_data, reg1; \
ba 2f; \
wr %g0, ASI_N, %asi; \
1: rdpr %tl, reg2; \
sub reg2, 3, reg2; \
mulx reg2, CH_ERR_TL1_DATA_SIZE, reg2; \
add reg1, reg2, reg1; \
wr %g0, ASI_MEM, %asi; \
2:
/*
* Macro to generate entry code for TL>0 error handlers.
* At the end of this macro, %g1 will point to the ch_err_tl1_data
* structure and %g2 will have the original flags in the ch_err_tl1_data
* structure and %g5 will have the value of %tstate where the Fast ECC
* routines will save the state of the D$ in Bit2 CH_ERR_TSTATE_DC_ON.
* All %g registers except for %g1, %g2 and %g5 will be available after
* this macro.
* Does the following steps:
* 1. Compute physical address of per-cpu/per-tl save area using
* only %g1+%g2 (which we've saved in %tpc, %tnpc, %tstate)
* leaving address in %g1 and updating the %asi register.
* If there is no data area available, we branch to label.
* 2. Save %g3-%g7 in save area.
* 3. Save %tpc->%g3, %tnpc->%g4, %tstate->%g5, which contain
* original %g1+%g2 values (because we're going to change %tl).
* 4. set %tl <- %tl - 1. We do this ASAP to make window of
* running at %tl+1 as small as possible.
* 5. Reconstitute %g1+%g2 from %tpc (%g3), %tnpc (%g4),
* %tstate (%g5) and save in save area, carefully preserving %g5
* because it has the CH_ERR_TSTATE_DC_ON value.
* 6. Load existing ch_err_tl1_data flags in %g2
* 7. Compute the new flags
* 8. If %g2 is non-zero (the structure was busy), shift the new
* flags by CH_ERR_ME_SHIFT and or them with the old flags.
* 9. Store the updated flags into ch_err_tl1_data flags.
* 10. If %g2 is non-zero, read the %tpc and store it in
* ch_err_tl1_data.
*/
#define CH_ERR_TL1_ENTER(flags) \
GET_CH_ERR_TL1_PTR(%g1, %g2, CHPR_TL1_ERR_DATA); \
stxa %g3, [%g1 + CH_ERR_TL1_G3]%asi; \
stxa %g4, [%g1 + CH_ERR_TL1_G4]%asi; \
stxa %g5, [%g1 + CH_ERR_TL1_G5]%asi; \
stxa %g6, [%g1 + CH_ERR_TL1_G6]%asi; \
stxa %g7, [%g1 + CH_ERR_TL1_G7]%asi; \
rdpr %tpc, %g3; \
rdpr %tnpc, %g4; \
rdpr %tstate, %g5; \
rdpr %tl, %g6; \
sub %g6, 1, %g6; \
wrpr %g6, %tl; \
and %g5, 3, %g6; \
andn %g3, 3, %g3; \
or %g3, %g6, %g3; \
stxa %g3, [%g1 + CH_ERR_TL1_G1]%asi; \
srlx %g5, CH_ERR_G2_TO_TSTATE_SHFT, %g6; \
and %g6, 3, %g6; \
andn %g4, 3, %g4; \
or %g6, %g4, %g4; \
stxa %g4, [%g1 + CH_ERR_TL1_G2]%asi; \
ldxa [%g1 + CH_ERR_TL1_FLAGS]%asi, %g2; \
set flags | CH_ERR_TL, %g3; \
brz %g2, 9f; \
sllx %g3, CH_ERR_ME_SHIFT, %g4; \
or %g2, %g4, %g3; \
9: stxa %g3, [%g1 + CH_ERR_TL1_FLAGS]%asi; \
brnz %g2, 8f; \
rdpr %tpc, %g4; \
stxa %g4, [%g1 + CH_ERR_TL1_TPC]%asi; \
8:
/*
* Turns off D$/I$ and saves the state of DCU_DC+DCU_IC in %tstate Bits 8+9
* (CH_ERR_TSTATE_DC_ON/CH_ERR_TSTATE_IC_ON). This is invoked on Fast ECC
* at TL>0 handlers because the D$ may have corrupted data and we need to
* turn off the I$ to allow for diagnostic accesses. We then invoke
* the normal entry macro and after it is done we save the values of
* the original D$/I$ state, which is in %g5 bits CH_ERR_TSTATE_DC_ON/
* CH_ERR_TSTATE_IC_ON in ch_err_tl1_tmp.
*/
#define CH_ERR_TL1_FECC_ENTER \
ldxa [%g0]ASI_DCU, %g1; \
andn %g1, DCU_DC + DCU_IC, %g2; \
stxa %g2, [%g0]ASI_DCU; \
flush %g0; /* DCU_IC need flush */ \
rdpr %tstate, %g2; \
and %g1, DCU_DC + DCU_IC, %g1; \
sllx %g1, CH_ERR_DCU_TO_TSTATE_SHFT, %g1; \
or %g1, %g2, %g2; \
wrpr %g2, %tstate; \
CH_ERR_TL1_ENTER(CH_ERR_FECC); \
and %g5, CH_ERR_TSTATE_DC_ON + CH_ERR_TSTATE_IC_ON, %g5; \
stxa %g5, [%g1 + CH_ERR_TL1_TMP]%asi
/*
* Macro to generate exit code for TL>0 error handlers.
* We fall into this macro if we've successfully logged the error in
* the ch_err_tl1_data structure and want the PIL15 softint to pick
* it up and log it.
* Does the following steps:
* 1. Set pending flag for this cpu in ch_err_tl1_pending.
* 2. Write %set_softint with (1<<pil) to cause a pil level trap
* 3. Restore registers from ch_err_tl1_data, which is pointed to
* by %g1, last register to restore is %g1 since it's pointing
* to the save area.
* 4. Execute retry
*/
#define CH_ERR_TL1_EXIT \
CPU_INDEX(%g2, %g3); \
set ch_err_tl1_pending, %g3; \
set -1, %g4; \
stb %g4, [%g2 + %g3]; \
mov 1, %g2; \
sll %g2, PIL_15, %g2; \
wr %g2, SET_SOFTINT; \
ldxa [%g1 + CH_ERR_TL1_G7]%asi, %g7; \
ldxa [%g1 + CH_ERR_TL1_G6]%asi, %g6; \
ldxa [%g1 + CH_ERR_TL1_G5]%asi, %g5; \
ldxa [%g1 + CH_ERR_TL1_G4]%asi, %g4; \
ldxa [%g1 + CH_ERR_TL1_G3]%asi, %g3; \
ldxa [%g1 + CH_ERR_TL1_G2]%asi, %g2; \
ldxa [%g1 + CH_ERR_TL1_G1]%asi, %g1; \
retry
/*
* Generates unrecoverable error label for TL>0 handlers.
* At label (Unrecoverable error routine)
* 1. Sets flags in ch_err_tl1_data and leaves in %g2 (first
* argument to cpu_tl1_err_panic).
* 2. Call cpu_tl1_err_panic via systrap at PIL 15
*/
#define CH_ERR_TL1_PANIC_EXIT(label) \
label: ldxa [%g1 + CH_ERR_TL1_FLAGS]%asi, %g2; \
or %g2, CH_ERR_TL | CH_ERR_PANIC, %g2; \
stxa %g2, [%g1 + CH_ERR_TL1_FLAGS]%asi; \
set cpu_tl1_err_panic, %g1; \
ba sys_trap; \
mov PIL_15, %g4
/* END CSTYLED */
#endif /* _ASM */
#ifdef __cplusplus
}
#endif
#endif /* _CHEETAHASM_H */