sun4u/cpu/us3_common_asm.s

	us3_common_asm.s revision 19f938d5a97467cae5809ba3cee189b02c8e03b7
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 *
 * Assembly code support for Cheetah/Cheetah+ modules
 */

#pragma ident   "%Z%%M% %I% %E% SMI"

#if !defined(lint)
#include "assym.h"
#endif  /* !lint */

#include <sys/asm_linkage.h>
#include <sys/mmu.h>
#include <vm/hat_sfmmu.h>
#include <sys/machparam.h>
#include <sys/machcpuvar.h>
#include <sys/machthread.h>
#include <sys/machtrap.h>
#include <sys/privregs.h>
#include <sys/trap.h>
#include <sys/cheetahregs.h>
#include <sys/us3_module.h>
#include <sys/xc_impl.h>
#include <sys/intreg.h>
#include <sys/async.h>
#include <sys/clock.h>
#include <sys/cheetahasm.h>
#include <sys/cmpregs.h>

#ifdef TRAPTRACE
#include <sys/traptrace.h>
#endif /* TRAPTRACE */

#if !defined(lint)

/* BEGIN CSTYLED */

#define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)          \
    ldxa    [%g0]ASI_DCU, tmp1                  ;\
    btst    DCU_DC, tmp1        /* is dcache enabled? */    ;\
    bz,pn   %icc, 1f                        ;\
    ASM_LD(tmp1, dcache_linesize)                   ;\
    ASM_LD(tmp2, dflush_type)                   ;\
    cmp tmp2, FLUSHPAGE_TYPE                    ;\
    be,pt   %icc, 2f                        ;\
    nop                             ;\
    sllx    arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */   ;\
    ASM_LD(tmp3, dcache_size)                   ;\
    cmp tmp2, FLUSHMATCH_TYPE                   ;\
    be,pt   %icc, 3f                        ;\
    nop                             ;\
    /*                              \
     * flushtype = FLUSHALL_TYPE, flush the whole thing     \
     * tmp3 = cache size                        \
     * tmp1 = cache line size                   \
     */                             \
    sub tmp3, tmp1, tmp2                    ;\
4:                                  \
    stxa    %g0, [tmp2]ASI_DC_TAG                   ;\
    membar  #Sync                           ;\
    cmp %g0, tmp2                       ;\
    bne,pt  %icc, 4b                        ;\
    sub tmp2, tmp1, tmp2                    ;\
    ba,pt   %icc, 1f                        ;\
    nop                             ;\
    /*                              \
     * flushtype = FLUSHPAGE_TYPE                   \
     * arg1 = pfn                           \
     * arg2 = virtual color                     \
     * tmp1 = cache line size                   \
     * tmp2 = tag from cache                    \
     * tmp3 = counter                       \
     */                             \
2:                                  \
    set MMU_PAGESIZE, tmp3                  ;\
        sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA     */   ;\
    sub tmp3, tmp1, tmp3                    ;\
4:                                  \
    stxa    %g0, [arg1 + tmp3]ASI_DC_INVAL              ;\
    membar  #Sync                           ;\
5:                                  \
    cmp %g0, tmp3                       ;\
    bnz,pt  %icc, 4b        /* branch if not done */    ;\
    sub tmp3, tmp1, tmp3                    ;\
    ba,pt   %icc, 1f                        ;\
    nop                             ;\
    /*                              \
     * flushtype = FLUSHMATCH_TYPE                  \
     * arg1 = tag to compare against                \
     * tmp1 = cache line size                   \
     * tmp3 = cache size                        \
     * arg2 = counter                       \
     * tmp2 = cache tag                     \
     */                             \
3:                                  \
    sub tmp3, tmp1, arg2                    ;\
4:                                  \
    ldxa    [arg2]ASI_DC_TAG, tmp2      /* read tag */      ;\
    btst    CHEETAH_DC_VBIT_MASK, tmp2              ;\
    bz,pn   %icc, 5f        /* br if no valid sub-blocks */ ;\
    andn    tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\
    cmp tmp2, arg1                      ;\
    bne,pn  %icc, 5f        /* branch if tag miss */    ;\
    nop                             ;\
    stxa    %g0, [arg2]ASI_DC_TAG                   ;\
    membar  #Sync                           ;\
5:                                  \
    cmp %g0, arg2                       ;\
    bne,pt  %icc, 4b        /* branch if not done */    ;\
    sub arg2, tmp1, arg2                    ;\
1:


/* END CSTYLED */

#endif  /* !lint */

/*
 * Cheetah MMU and Cache operations.
 */

#if defined(lint)

/* ARGSUSED */
void
vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
{}

#else   /* lint */

    ENTRY_NP(vtag_flushpage)
    /*
     * flush page from the tlb
     *
     * %o0 = vaddr
     * %o1 = sfmmup
     */
    rdpr    %pstate, %o5
#ifdef DEBUG
    PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
#endif /* DEBUG */
    /*
     * disable ints
     */
    andn    %o5, PSTATE_IE, %o4
    wrpr    %o4, 0, %pstate

    /*
     * Then, blow out the tlb
     * Interrupts are disabled to prevent the primary ctx register
     * from changing underneath us.
     */
    sethi   %hi(ksfmmup), %o3
        ldx     [%o3 + %lo(ksfmmup)], %o3
        cmp     %o3, %o1
        bne,pt   %xcc, 1f           ! if not kernel as, go to 1
      sethi %hi(FLUSH_ADDR), %o3
    /*
     * For Kernel demaps use primary. type = page implicitly
     */
    stxa    %g0, [%o0]ASI_DTLB_DEMAP    /* dmmu flush for KCONTEXT */
    stxa    %g0, [%o0]ASI_ITLB_DEMAP    /* immu flush for KCONTEXT */
    flush   %o3
    retl
      wrpr  %g0, %o5, %pstate       /* enable interrupts */
1:
    /*
     * User demap.  We need to set the primary context properly.
     * Secondary context cannot be used for Cheetah IMMU.
     * %o0 = vaddr
     * %o1 = sfmmup
     * %o3 = FLUSH_ADDR
     */
    SFMMU_CPU_CNUM(%o1, %g1, %g2)       ! %g1 = sfmmu cnum on this CPU

    ldub    [%o1 + SFMMU_CEXT], %o4     ! %o4 = sfmmup->sfmmu_cext
    sll %o4, CTXREG_EXT_SHIFT, %o4
    or  %g1, %o4, %g1           ! %g1 = primary pgsz | cnum

    wrpr    %g0, 1, %tl
    set MMU_PCONTEXT, %o4
    or  DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
    ldxa    [%o4]ASI_DMMU, %o2      ! %o2 = save old ctxnum
    srlx    %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz
    sllx    %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz
    or  %g1, %o1, %g1           ! %g1 = nucleus pgsz | primary pgsz | cnum
    stxa    %g1, [%o4]ASI_DMMU      ! wr new ctxum

    stxa    %g0, [%o0]ASI_DTLB_DEMAP
    stxa    %g0, [%o0]ASI_ITLB_DEMAP
    stxa    %o2, [%o4]ASI_DMMU      /* restore old ctxnum */
    flush   %o3
    wrpr    %g0, 0, %tl

    retl
    wrpr    %g0, %o5, %pstate       /* enable interrupts */
    SET_SIZE(vtag_flushpage)

#endif  /* lint */

#if defined(lint)

void
vtag_flushall(void)
{}

#else   /* lint */

    ENTRY_NP2(vtag_flushall, demap_all)
    /*
     * flush the tlb
     */
    sethi   %hi(FLUSH_ADDR), %o3
    set DEMAP_ALL_TYPE, %g1
    stxa    %g0, [%g1]ASI_DTLB_DEMAP
    stxa    %g0, [%g1]ASI_ITLB_DEMAP
    flush   %o3
    retl
    nop
    SET_SIZE(demap_all)
    SET_SIZE(vtag_flushall)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
{}

#else   /* lint */

    ENTRY_NP(vtag_flushpage_tl1)
    /*
     * x-trap to flush page from tlb and tsb
     *
     * %g1 = vaddr, zero-extended on 32-bit kernel
     * %g2 = sfmmup
     *
     * assumes TSBE_TAG = 0
     */
    srln    %g1, MMU_PAGESHIFT, %g1

    sethi   %hi(ksfmmup), %g3
        ldx     [%g3 + %lo(ksfmmup)], %g3
        cmp     %g3, %g2
        bne,pt  %xcc, 1f                        ! if not kernel as, go to 1
      slln  %g1, MMU_PAGESHIFT, %g1     /* g1 = vaddr */

    /* We need to demap in the kernel context */
    or  DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
    stxa    %g0, [%g1]ASI_DTLB_DEMAP
    stxa    %g0, [%g1]ASI_ITLB_DEMAP
    retry
1:
    /* We need to demap in a user context */
    or  DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1

    SFMMU_CPU_CNUM(%g2, %g6, %g3)   ! %g6 = sfmmu cnum on this CPU

    ldub    [%g2 + SFMMU_CEXT], %g4     ! %g4 = sfmmup->cext
    sll %g4, CTXREG_EXT_SHIFT, %g4
    or  %g6, %g4, %g6           ! %g6 = pgsz | cnum

    set MMU_PCONTEXT, %g4
    ldxa    [%g4]ASI_DMMU, %g5      /* rd old ctxnum */
    srlx    %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
    sllx    %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
    or  %g6, %g2, %g6           /* %g6 = nucleus pgsz | primary pgsz | cnum */
    stxa    %g6, [%g4]ASI_DMMU      /* wr new ctxum */
    stxa    %g0, [%g1]ASI_DTLB_DEMAP
    stxa    %g0, [%g1]ASI_ITLB_DEMAP
    stxa    %g5, [%g4]ASI_DMMU      /* restore old ctxnum */
    retry
    SET_SIZE(vtag_flushpage_tl1)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
{}

#else   /* lint */

    ENTRY_NP(vtag_flush_pgcnt_tl1)
    /*
     * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
     *
     * %g1 = vaddr, zero-extended on 32-bit kernel
     * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
     *
     * NOTE: this handler relies on the fact that no
     *  interrupts or traps can occur during the loop
     *  issuing the TLB_DEMAP operations. It is assumed
     *  that interrupts are disabled and this code is
     *  fetching from the kernel locked text address.
     *
     * assumes TSBE_TAG = 0
     */
    set SFMMU_PGCNT_MASK, %g4
    and %g4, %g2, %g3           /* g3 = pgcnt - 1 */
    add %g3, 1, %g3         /* g3 = pgcnt */

    andn    %g2, SFMMU_PGCNT_MASK, %g2  /* g2 = sfmmup */
    srln    %g1, MMU_PAGESHIFT, %g1

    sethi   %hi(ksfmmup), %g4
        ldx     [%g4 + %lo(ksfmmup)], %g4
        cmp     %g4, %g2
        bne,pn   %xcc, 1f           /* if not kernel as, go to 1 */
      slln  %g1, MMU_PAGESHIFT, %g1     /* g1 = vaddr */

    /* We need to demap in the kernel context */
    or  DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
    set MMU_PAGESIZE, %g2       /* g2 = pgsize */
    sethi   %hi(FLUSH_ADDR), %g5
4:
    stxa    %g0, [%g1]ASI_DTLB_DEMAP
    stxa    %g0, [%g1]ASI_ITLB_DEMAP
    flush   %g5             ! flush required by immu

    deccc   %g3             /* decr pgcnt */
    bnz,pt  %icc,4b
      add   %g1, %g2, %g1           /* next page */
    retry
1:
    /*
     * We need to demap in a user context
     *
     * g2 = sfmmup
     * g3 = pgcnt
     */
    SFMMU_CPU_CNUM(%g2, %g5, %g6)       ! %g5 = sfmmu cnum on this CPU

    or  DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1

    ldub    [%g2 + SFMMU_CEXT], %g4     ! %g4 = sfmmup->cext
    sll %g4, CTXREG_EXT_SHIFT, %g4
    or  %g5, %g4, %g5

    set MMU_PCONTEXT, %g4
    ldxa    [%g4]ASI_DMMU, %g6      /* rd old ctxnum */
    srlx    %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
    sllx    %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
    or  %g5, %g2, %g5           /* %g5 = nucleus pgsz | primary pgsz | cnum */
    stxa    %g5, [%g4]ASI_DMMU      /* wr new ctxum */

    set MMU_PAGESIZE, %g2       /* g2 = pgsize */
    sethi   %hi(FLUSH_ADDR), %g5
3:
    stxa    %g0, [%g1]ASI_DTLB_DEMAP
    stxa    %g0, [%g1]ASI_ITLB_DEMAP
    flush   %g5             ! flush required by immu

    deccc   %g3             /* decr pgcnt */
    bnz,pt  %icc,3b
      add   %g1, %g2, %g1           /* next page */

    stxa    %g6, [%g4]ASI_DMMU      /* restore old ctxnum */
    retry
    SET_SIZE(vtag_flush_pgcnt_tl1)

#endif  /* lint */

#if defined(lint)

/*ARGSUSED*/
void
vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
{}

#else   /* lint */

    ENTRY_NP(vtag_flushall_tl1)
    /*
     * x-trap to flush tlb
     */
    set DEMAP_ALL_TYPE, %g4
    stxa    %g0, [%g4]ASI_DTLB_DEMAP
    stxa    %g0, [%g4]ASI_ITLB_DEMAP
    retry
    SET_SIZE(vtag_flushall_tl1)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
vac_flushpage(pfn_t pfnum, int vcolor)
{}

#else   /* lint */

/*
 * vac_flushpage(pfnum, color)
 *  Flush 1 8k page of the D-$ with physical page = pfnum
 *  Algorithm:
 *      The cheetah dcache is a 64k psuedo 4 way accaociative cache.
 *      It is virtual indexed, physically tagged cache.
 */
    .seg    ".data"
    .align  8
    .global dflush_type
dflush_type:
    .word   FLUSHPAGE_TYPE

    ENTRY(vac_flushpage)
    /*
     * flush page from the d$
     *
     * %o0 = pfnum, %o1 = color
     */
    DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
    retl
      nop
    SET_SIZE(vac_flushpage)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
{}

#else   /* lint */

    ENTRY_NP(vac_flushpage_tl1)
    /*
     * x-trap to flush page from the d$
     *
     * %g1 = pfnum, %g2 = color
     */
    DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
    retry
    SET_SIZE(vac_flushpage_tl1)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
vac_flushcolor(int vcolor, pfn_t pfnum)
{}

#else   /* lint */
    /*
     * In UltraSPARC III flushcolor is same as as flushpage.
     * This is because we have an ASI to flush dcache using physical
     * address.
     * Flushing dcache using physical address is faster because we
     * don't have to deal with associativity of dcache.
     * The arguments to vac_flushpage() and vac_flushcolor() are same but
     * the order is reversed. this is because we maintain compatibility
     * with spitfire, in which vac_flushcolor has only one argument, namely
     * vcolor.
     */

    ENTRY(vac_flushcolor)
    /*
     * %o0 = vcolor, %o1 = pfnum
     */
    DCACHE_FLUSHPAGE(%o1, %o0, %o2, %o3, %o4)
    retl
      nop
    SET_SIZE(vac_flushcolor)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum)
{}

#else   /* lint */

    ENTRY(vac_flushcolor_tl1)
    /*
     * %g1 = vcolor
     * %g2 = pfnum
     */
    DCACHE_FLUSHPAGE(%g2, %g1, %g3, %g4, %g5)
    retry
    SET_SIZE(vac_flushcolor_tl1)

#endif  /* lint */

#if defined(lint)

int
idsr_busy(void)
{
    return (0);
}

#else   /* lint */

/*
 * Determine whether or not the IDSR is busy.
 * Entry: no arguments
 * Returns: 1 if busy, 0 otherwise
 */
    ENTRY(idsr_busy)
    ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
    clr %o0
    btst    IDSR_BUSY, %g1
    bz,a,pt %xcc, 1f
    mov 1, %o0
1:
    retl
    nop
    SET_SIZE(idsr_busy)

#endif  /* lint */

#if defined(lint)

/* ARGSUSED */
void
init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
{}

/* ARGSUSED */
void
init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
{}

#else   /* lint */

    .global _dispatch_status_busy
_dispatch_status_busy:
    .asciz  "ASI_INTR_DISPATCH_STATUS error: busy"
    .align  4

/*
 * Setup interrupt dispatch data registers
 * Entry:
 *  %o0 - function or inumber to call
 *  %o1, %o2 - arguments (2 uint64_t's)
 */
    .seg "text"

    ENTRY(init_mondo)
#ifdef DEBUG
    !
    ! IDSR should not be busy at the moment
    !
    ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
    btst    IDSR_BUSY, %g1
    bz,pt   %xcc, 1f
    nop
    sethi   %hi(_dispatch_status_busy), %o0
    call    panic
    or  %o0, %lo(_dispatch_status_busy), %o0
#endif /* DEBUG */

    ALTENTRY(init_mondo_nocheck)
    !
    ! interrupt vector dispatch data reg 0
    !
1:
    mov IDDR_0, %g1
    mov IDDR_1, %g2
    mov IDDR_2, %g3
    stxa    %o0, [%g1]ASI_INTR_DISPATCH

    !
    ! interrupt vector dispatch data reg 1
    !
    stxa    %o1, [%g2]ASI_INTR_DISPATCH

    !
    ! interrupt vector dispatch data reg 2
    !
    stxa    %o2, [%g3]ASI_INTR_DISPATCH

    membar  #Sync
    retl
    nop
    SET_SIZE(init_mondo_nocheck)
    SET_SIZE(init_mondo)

#endif  /* lint */


#if !(defined(JALAPENO) || defined(SERRANO))

#if defined(lint)

/* ARGSUSED */
void
shipit(int upaid, int bn)
{ return; }

#else   /* lint */

/*
 * Ship mondo to aid using busy/nack pair bn
 */
    ENTRY_NP(shipit)
    sll %o0, IDCR_PID_SHIFT, %g1    ! IDCR<18:14> = agent id
    sll %o1, IDCR_BN_SHIFT, %g2     ! IDCR<28:24> = b/n pair
    or  %g1, IDCR_OFFSET, %g1       ! IDCR<13:0> = 0x70
    or  %g1, %g2, %g1
    stxa    %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch
    membar  #Sync
    retl
    nop
    SET_SIZE(shipit)

#endif  /* lint */

#endif  /* !(JALAPENO || SERRANO) */


#if defined(lint)

/* ARGSUSED */
void
flush_instr_mem(caddr_t vaddr, size_t len)
{}

#else   /* lint */

/*
 * flush_instr_mem:
 *  Flush 1 page of the I-$ starting at vaddr
 *  %o0 vaddr
 *  %o1 bytes to be flushed
 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
 * the stores from all processors so that a FLUSH instruction is only needed
 * to ensure pipeline is consistent. This means a single flush is sufficient at
 * the end of a sequence of stores that updates the instruction stream to
 * ensure correct operation.
 */

    ENTRY(flush_instr_mem)
    flush   %o0         ! address irrelevant
    retl
    nop
    SET_SIZE(flush_instr_mem)

#endif  /* lint */


#if defined(CPU_IMP_ECACHE_ASSOC)

#if defined(lint)

/* ARGSUSED */
uint64_t
get_ecache_ctrl(void)
{ return (0); }

#else   /* lint */

    ENTRY(get_ecache_ctrl)
    GET_CPU_IMPL(%o0)
    cmp %o0, JAGUAR_IMPL
    !
    ! Putting an ASI access in the delay slot may
    ! cause it to be accessed, even when annulled.
    !
    bne 1f
      nop
    ldxa    [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg
    b   2f
      nop
1:
    ldxa    [%g0]ASI_EC_CTRL, %o0       ! read Ch/Ch+ E$ control reg
2:
    retl
      nop
    SET_SIZE(get_ecache_ctrl)

#endif  /* lint */

#endif  /* CPU_IMP_ECACHE_ASSOC */


#if !(defined(JALAPENO) || defined(SERRANO))

/*
 * flush_ecache:
 *  %o0 - 64 bit physical address
 *  %o1 - ecache size
 *  %o2 - ecache linesize
 */
#if defined(lint)

/*ARGSUSED*/
void
flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize)
{}

#else /* !lint */

    ENTRY(flush_ecache)

    /*
     * For certain CPU implementations, we have to flush the L2 cache
     * before flushing the ecache.
     */
    PN_L2_FLUSHALL(%g3, %g4, %g5)

    /*
     * Flush the entire Ecache using displacement flush.
     */
    ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)

    retl
    nop
    SET_SIZE(flush_ecache)

#endif /* lint */

#endif  /* !(JALAPENO || SERRANO) */


#if defined(lint)

void
flush_dcache(void)
{}

#else   /* lint */

    ENTRY(flush_dcache)
    ASM_LD(%o0, dcache_size)
    ASM_LD(%o1, dcache_linesize)
    CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
    retl
    nop
    SET_SIZE(flush_dcache)

#endif  /* lint */


#if defined(lint)

void
flush_icache(void)
{}

#else   /* lint */

    ENTRY(flush_icache)
    GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
    ld  [%o0 + CHPR_ICACHE_LINESIZE], %o1
    ba,pt   %icc, 2f
      ld    [%o0 + CHPR_ICACHE_SIZE], %o0
flush_icache_1:
    ASM_LD(%o0, icache_size)
    ASM_LD(%o1, icache_linesize)
2:
    CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
    retl
    nop
    SET_SIZE(flush_icache)

#endif  /* lint */

#if defined(lint)

/*ARGSUSED*/
void
kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size,
    int icache_lsize)
{
}

#else   /* lint */

    ENTRY(kdi_flush_idcache)
    CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
    CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
    membar  #Sync
    retl
    nop
    SET_SIZE(kdi_flush_idcache)

#endif  /* lint */

#if defined(lint)

void
flush_pcache(void)
{}

#else   /* lint */

    ENTRY(flush_pcache)
    PCACHE_FLUSHALL(%o0, %o1, %o2)
    retl
    nop
    SET_SIZE(flush_pcache)

#endif  /* lint */


#if defined(CPU_IMP_L1_CACHE_PARITY)

#if defined(lint)

/* ARGSUSED */
void
get_dcache_dtag(uint32_t dcache_idx, uint64_t *data)
{}

#else   /* lint */

/*
 * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
 * structure (see cheetahregs.h):
 * The Dcache *should* be turned off when this code is executed.
 */
    .align  128
    ENTRY(get_dcache_dtag)
    rdpr    %pstate, %o5
    andn    %o5, PSTATE_IE | PSTATE_AM, %o3
    wrpr    %g0, %o3, %pstate
    b   1f
      stx   %o0, [%o1 + CH_DC_IDX]

    .align  128
1:
    ldxa    [%o0]ASI_DC_TAG, %o2
    stx %o2, [%o1 + CH_DC_TAG]
    membar  #Sync
    ldxa    [%o0]ASI_DC_UTAG, %o2
    membar  #Sync
    stx %o2, [%o1 + CH_DC_UTAG]
    ldxa    [%o0]ASI_DC_SNP_TAG, %o2
    stx %o2, [%o1 + CH_DC_SNTAG]
    add %o1, CH_DC_DATA, %o1
    clr %o3
2:
    membar  #Sync               ! required before ASI_DC_DATA
    ldxa    [%o0 + %o3]ASI_DC_DATA, %o2
    membar  #Sync               ! required after ASI_DC_DATA
    stx %o2, [%o1 + %o3]
    cmp %o3, CH_DC_DATA_REG_SIZE - 8
    blt 2b
      add   %o3, 8, %o3

    /*
     * Unlike other CPUs in the family, D$ data parity bits for Panther
     * do not reside in the microtag. Instead, we have to read them
     * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
     * of just having 8 parity bits to protect all 32 bytes of data
     * per line, we now have 32 bits of parity.
     */
    GET_CPU_IMPL(%o3)
    cmp %o3, PANTHER_IMPL
    bne 4f
      clr   %o3

    /*
     * move our pointer to the next field where we store parity bits
     * and add the offset of the last parity byte since we will be
     * storing all 4 parity bytes within one 64 bit field like this:
     *
     * +------+------------+------------+------------+------------+
     * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
     * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
     * +------+------------+------------+------------+------------+
     *  63:32     31:24        23:16         15:8          7:0
     */
    add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1

    /* add the DC_data_parity bit into our working index */
    mov 1, %o2
    sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
    or  %o0, %o2, %o0
3:
    membar  #Sync               ! required before ASI_DC_DATA
    ldxa    [%o0 + %o3]ASI_DC_DATA, %o2
    membar  #Sync               ! required after ASI_DC_DATA
    stb %o2, [%o1]
    dec %o1
    cmp %o3, CH_DC_DATA_REG_SIZE - 8
    blt 3b
      add   %o3, 8, %o3
4:
    retl
      wrpr  %g0, %o5, %pstate
    SET_SIZE(get_dcache_dtag)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
get_icache_dtag(uint32_t ecache_idx, uint64_t *data)
{}

#else   /* lint */

/*
 * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
 * structure (see cheetahregs.h):
 * The Icache *Must* be turned off when this function is called.
 * This is because diagnostic accesses to the Icache interfere with cache
 * consistency.
 */
    .align  128
    ENTRY(get_icache_dtag)
    rdpr    %pstate, %o5
    andn    %o5, PSTATE_IE | PSTATE_AM, %o3
    wrpr    %g0, %o3, %pstate

    stx %o0, [%o1 + CH_IC_IDX]
    ldxa    [%o0]ASI_IC_TAG, %o2
    stx %o2, [%o1 + CH_IC_PATAG]
    add %o0, CH_ICTAG_UTAG, %o0
    ldxa    [%o0]ASI_IC_TAG, %o2
    add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
    stx %o2, [%o1 + CH_IC_UTAG]
    ldxa    [%o0]ASI_IC_TAG, %o2
    add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
    stx %o2, [%o1 + CH_IC_UPPER]
    ldxa    [%o0]ASI_IC_TAG, %o2
    andn    %o0, CH_ICTAG_TMASK, %o0
    stx %o2, [%o1 + CH_IC_LOWER]
    ldxa    [%o0]ASI_IC_SNP_TAG, %o2
    stx %o2, [%o1 + CH_IC_SNTAG]
    add %o1, CH_IC_DATA, %o1
    clr %o3
2:
    ldxa    [%o0 + %o3]ASI_IC_DATA, %o2
    stx %o2, [%o1 + %o3]
    cmp %o3, PN_IC_DATA_REG_SIZE - 8
    blt 2b
      add   %o3, 8, %o3

    retl
      wrpr  %g0, %o5, %pstate
    SET_SIZE(get_icache_dtag)

#endif  /* lint */

#if defined(lint)

/* ARGSUSED */
void
get_pcache_dtag(uint32_t pcache_idx, uint64_t *data)
{}

#else   /* lint */

/*
 * Get pcache data and tags.
 * inputs:
 *   pcache_idx - fully constructed VA for for accessing P$ diagnostic
 *        registers. Contains PC_way and PC_addr shifted into
 *        the correct bit positions. See the PRM for more details.
 *   data   - pointer to a ch_pc_data_t
 * structure (see cheetahregs.h):
 */
    .align  128
    ENTRY(get_pcache_dtag)
    rdpr    %pstate, %o5
    andn    %o5, PSTATE_IE | PSTATE_AM, %o3
    wrpr    %g0, %o3, %pstate

    stx %o0, [%o1 + CH_PC_IDX]
    ldxa    [%o0]ASI_PC_STATUS_DATA, %o2
    stx %o2, [%o1 + CH_PC_STATUS]
    ldxa    [%o0]ASI_PC_TAG, %o2
    stx %o2, [%o1 + CH_PC_TAG]
    ldxa    [%o0]ASI_PC_SNP_TAG, %o2
    stx %o2, [%o1 + CH_PC_SNTAG]
    add %o1, CH_PC_DATA, %o1
    clr %o3
2:
    ldxa    [%o0 + %o3]ASI_PC_DATA, %o2
    stx %o2, [%o1 + %o3]
    cmp %o3, CH_PC_DATA_REG_SIZE - 8
    blt 2b
      add   %o3, 8, %o3

    retl
      wrpr  %g0, %o5, %pstate
    SET_SIZE(get_pcache_dtag)

#endif  /* lint */

#endif  /* CPU_IMP_L1_CACHE_PARITY */

#if defined(lint)

/* ARGSUSED */
void
set_dcu(uint64_t dcu)
{}

#else   /* lint */

/*
 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
 *   %o0 - 64 bit constant
 */
    ENTRY(set_dcu)
    stxa    %o0, [%g0]ASI_DCU   ! Store to DCU
    flush   %g0 /* flush required after changing the IC bit */
    retl
    nop
    SET_SIZE(set_dcu)

#endif  /* lint */


#if defined(lint)

uint64_t
get_dcu(void)
{
    return ((uint64_t)0);
}

#else   /* lint */

/*
 * Return DCU register.
 */
    ENTRY(get_dcu)
    ldxa    [%g0]ASI_DCU, %o0       /* DCU control register */
    retl
    nop
    SET_SIZE(get_dcu)

#endif  /* lint */

/*
 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
 *
 * This handler is used to check for softints generated by error trap
 * handlers to report errors.  On Cheetah, this mechanism is used by the
 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */
#if defined(lint)

void
ch_pil15_interrupt_instr(void)
{}

#else   /* lint */

    ENTRY_NP(ch_pil15_interrupt_instr)
    ASM_JMP(%g1, ch_pil15_interrupt)
    SET_SIZE(ch_pil15_interrupt_instr)

#endif


#if defined(lint)

void
ch_pil15_interrupt(void)
{}

#else   /* lint */

    ENTRY_NP(ch_pil15_interrupt)

    /*
     * Since pil_interrupt is hacked to assume that every level 15
     * interrupt is generated by the CPU to indicate a performance
     * counter overflow this gets ugly.  Before calling pil_interrupt
     * the Error at TL>0 pending status is inspected.  If it is
     * non-zero, then an error has occurred and it is handled.
     * Otherwise control is transfered to pil_interrupt.  Note that if
     * an error is detected pil_interrupt will not be called and
     * overflow interrupts may be lost causing erroneous performance
     * measurements.  However, error-recovery will have a detrimental
     * effect on performance anyway.
     */
    CPU_INDEX(%g1, %g4)
    set ch_err_tl1_pending, %g4
    ldub    [%g1 + %g4], %g2
    brz %g2, 1f
      nop

    /*
     * We have a pending TL>0 error, clear the TL>0 pending status.
     */
    stb %g0, [%g1 + %g4]

    /*
     * Clear the softint.
     */
    mov 1, %g5
    sll %g5, PIL_15, %g5
    wr  %g5, CLEAR_SOFTINT

    /*
     * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
     * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
     * panic flag (%g2).
     */
    set cpu_tl1_error, %g1
    clr %g2
    ba  sys_trap
      mov   PIL_15, %g4

1:
    /*
     * The logout is invalid.
     *
     * Call the default interrupt handler.
     */
    sethi   %hi(pil_interrupt), %g1
    jmp %g1 + %lo(pil_interrupt)
      mov   PIL_15, %g4

    SET_SIZE(ch_pil15_interrupt)
#endif


/*
 * Error Handling
 *
 * Cheetah provides error checking for all memory access paths between
 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
 * AFAR and one of the following traps is generated (provided that it
 * is enabled in External Cache Error Enable Register) to handle that
 * error:
 * 1. trap 0x70: Precise trap
 *    tt0_fecc for errors at trap level(TL)>=0
 * 2. trap 0x0A and 0x32: Deferred trap
 *    async_err for errors at TL>=0
 * 3. trap 0x63: Disrupting trap
 *    ce_err for errors at TL=0
 *    (Note that trap 0x63 cannot happen at trap level > 0)
 *
 * Trap level one handlers panic the system except for the fast ecc
 * error handler which tries to recover from certain errors.
 */

/*
 * FAST ECC TRAP STRATEGY:
 *
 * Software must handle single and multi bit errors which occur due to data
 * or instruction cache reads from the external cache. A single or multi bit
 * error occuring in one of these situations results in a precise trap.
 *
 * The basic flow of this trap handler is as follows:
 *
 * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
 *    is disabled because bad data could have been installed.  The Icache is
 *    turned off because we want to capture the Icache line related to the
 *    AFAR.
 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
 * 3) Park sibling core if caches are shared (to avoid race condition while
 *    accessing shared resources such as L3 data staging register during
 *    CPU logout.
 * 4) Read the AFAR and AFSR.
 * 5) If CPU logout structure is not being used, then:
 *    6) Clear all errors from the AFSR.
 *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
 *       state.
 *    9) Unpark sibling core if we parked it earlier.
 *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
 *        running at PIL 15.
 * 6) Otherwise, if CPU logout structure is being used:
 *    7) Incriment the "logout busy count".
 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
 *       state.
 *    9) Unpark sibling core if we parked it earlier.
 *    10) Issue a retry since the other CPU error logging code will end up
 *       finding this error bit and logging information about it later.
 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
 *    yet initialized such that we can't even check the logout struct, then
 *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
 *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
 *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
 *    in the high level trap handler since we don't have access to detailed
 *    logout information in cases where the cpu_private struct is not yet
 *    initialized.
 *
 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
 * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
 * since it is uses different code/data from this handler, has a better
 * chance of fixing things up than simply recursing through this code
 * again (this would probably cause an eventual kernel stack overflow).
 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
 * the Fast ECC at TL>0 handler and eventually Red Mode.
 *
 * Note that for Cheetah (and only Cheetah), we use alias addresses for
 * flushing rather than ASI accesses (which don't exist on Cheetah).
 * Should we encounter a Fast ECC error within this handler on Cheetah,
 * there's a good chance it's within the ecache_flushaddr buffer (since
 * it's the largest piece of memory we touch in the handler and it is
 * usually kernel text/data).  For that reason the Fast ECC at TL>0
 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
 */

/*
 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
 * architecture-specific files.
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */

#if defined(lint)

void
fecc_err_instr(void)
{}

#else   /* lint */

    ENTRY_NP(fecc_err_instr)
    membar  #Sync           ! Cheetah requires membar #Sync

    /*
     * Save current DCU state.  Turn off the Dcache and Icache.
     */
    ldxa    [%g0]ASI_DCU, %g1   ! save DCU in %g1
    andn    %g1, DCU_DC + DCU_IC, %g4
    stxa    %g4, [%g0]ASI_DCU
    flush   %g0 /* flush required after changing the IC bit */

    ASM_JMP(%g4, fast_ecc_err)
    SET_SIZE(fecc_err_instr)

#endif  /* lint */


#if !(defined(JALAPENO) || defined(SERRANO))

#if defined(lint)

void
fast_ecc_err(void)
{}

#else   /* lint */

    .section ".text"
    .align  64
    ENTRY_NP(fast_ecc_err)

    /*
     * Turn off CEEN and NCEEN.
     */
    ldxa    [%g0]ASI_ESTATE_ERR, %g3
    andn    %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
    stxa    %g4, [%g0]ASI_ESTATE_ERR
    membar  #Sync           ! membar sync required

    /*
     * Check to see whether we need to park our sibling core
     * before recording diagnostic information from caches
     * which may be shared by both cores.
     * We use %g1 to store information about whether or not
     * we had to park the core (%g1 holds our DCUCR value and
     * we only use bits from that register which are "reserved"
     * to keep track of core parking) so that we know whether
     * or not to unpark later. %g5 and %g4 are scratch registers.
     */
    PARK_SIBLING_CORE(%g1, %g5, %g4)

    /*
     * Do the CPU log out capture.
     *   %g3 = "failed?" return value.
     *   %g2 = Input = AFAR. Output the clo_flags info which is passed
     *         into this macro via %g4. Output only valid if cpu_private
     *         struct has not been initialized.
     *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
     *   %g4 = Trap information stored in the cpu logout flags field
     *   %g5 = scr1
     *   %g6 = scr2
     *   %g3 = scr3
     *   %g4 = scr4
     */
     /* store the CEEN and NCEEN values, TL=0 */
    and %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
    set CHPR_FECCTL0_LOGOUT, %g6
    DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)

    /*
     * Flush the Ecache (and L2 cache for Panther) to get the error out
     * of the Ecache.  If the UCC or UCU is on a dirty line, then the
     * following flush will turn that into a WDC or WDU, respectively.
     */
    PN_L2_FLUSHALL(%g4, %g5, %g6)

    CPU_INDEX(%g4, %g5)
    mulx    %g4, CPU_NODE_SIZE, %g4
    set cpunodes, %g5
    add %g4, %g5, %g4
    ld  [%g4 + ECACHE_LINESIZE], %g5
    ld  [%g4 + ECACHE_SIZE], %g4

    ASM_LDX(%g6, ecache_flushaddr)
    ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)

    /*
     * Flush the Dcache.  Since bad data could have been installed in
     * the Dcache we must flush it before re-enabling it.
     */
    ASM_LD(%g5, dcache_size)
    ASM_LD(%g6, dcache_linesize)
    CH_DCACHE_FLUSHALL(%g5, %g6, %g7)

    /*
     * Flush the Icache.  Since we turned off the Icache to capture the
     * Icache line it is now stale or corrupted and we must flush it
     * before re-enabling it.
     */
    GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
    ld  [%g5 + CHPR_ICACHE_LINESIZE], %g6
    ba,pt   %icc, 6f
      ld    [%g5 + CHPR_ICACHE_SIZE], %g5
fast_ecc_err_5:
    ASM_LD(%g5, icache_size)
    ASM_LD(%g6, icache_linesize)
6:
    CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)

    /*
     * check to see whether we parked our sibling core at the start
     * of this handler. If so, we need to unpark it here.
     * We use DCUCR reserved bits (stored in %g1) to keep track of
     * whether or not we need to unpark. %g5 and %g4 are scratch registers.
     */
    UNPARK_SIBLING_CORE(%g1, %g5, %g4)

    /*
     * Restore the Dcache and Icache to the previous state.
     */
    stxa    %g1, [%g0]ASI_DCU
    flush   %g0 /* flush required after changing the IC bit */

    /*
     * Make sure our CPU logout operation was successful.
     */
    cmp %g3, %g0
    be  8f
      nop

    /*
     * If the logout structure had been busy, how many times have
     * we tried to use it and failed (nesting count)? If we have
     * already recursed a substantial number of times, then we can
     * assume things are not going to get better by themselves and
     * so it would be best to panic.
     */
    cmp %g3, CLO_NESTING_MAX
    blt 7f
      nop

        call ptl1_panic
          mov   PTL1_BAD_ECC, %g1

7:
    /*
     * Otherwise, if the logout structure was busy but we have not
     * nested more times than our maximum value, then we simply
     * issue a retry. Our TL=0 trap handler code will check and
     * clear the AFSR after it is done logging what is currently
     * in the logout struct and handle this event at that time.
     */
    retry
8:
    /*
     * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
     * already at PIL 15.
     */
    set cpu_fast_ecc_error, %g1
    rdpr    %pil, %g4
    cmp %g4, PIL_14
    ba  sys_trap
      movl  %icc, PIL_14, %g4

    SET_SIZE(fast_ecc_err)

#endif  /* lint */

#endif  /* !(JALAPENO || SERRANO) */


/*
 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
 *
 * The basic flow of this trap handler is as follows:
 *
 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
 *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
 *    will use to save %g1 and %g2.
 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
 *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
 *    handler (using the just saved %g1).
 * 3) Turn off the Dcache if it was on and save the state of the Dcache
 *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
 *    NB: we don't turn off the Icache because bad data is not installed nor
 *        will we be doing any diagnostic accesses.
 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
 *    %tpc, %tnpc, %tstate values previously saved).
 * 6) set %tl to %tl - 1.
 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
 *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
 *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
 *    AFSR_EXT and save the value in ch_err_tl1_data.
 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
 *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
 * 11) Flush the Ecache.
 *    NB: the Ecache is flushed assuming the largest possible size with
 *        the smallest possible line size since access to the cpu_nodes may
 *        cause an unrecoverable DTLB miss.
 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
 *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
 *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
 *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
 * 14) Flush and re-enable the Dcache if it was on at step 3.
 * 15) Do TRAPTRACE if enabled.
 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
 * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
 *    event pending flag and call cpu_tl1_error via systrap if set.
 * 19) Restore the registers from step 5 and issue retry.
 */

/*
 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
 * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */

#if defined(lint)

void
fecc_err_tl1_instr(void)
{}

#else   /* lint */

    ENTRY_NP(fecc_err_tl1_instr)
    CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
    SET_SIZE(fecc_err_tl1_instr)

#endif  /* lint */

/*
 * Software trap 0 at TL>0.
 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
 * the various architecture-specific files.  This is used as a continuation
 * of the fast ecc handling where we've bought an extra TL level, so we can
 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
 * order two bits from %g1 and %g2 respectively).
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */
#if defined(lint)

void
fecc_err_tl1_cont_instr(void)
{}

#else   /* lint */

    ENTRY_NP(fecc_err_tl1_cont_instr)
    CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
    SET_SIZE(fecc_err_tl1_cont_instr)

#endif  /* lint */


#if defined(lint)

void
ce_err(void)
{}

#else   /* lint */

/*
 * The ce_err function handles disrupting trap type 0x63 at TL=0.
 *
 * AFSR errors bits which cause this trap are:
 *  CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
 *
 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
 *
 * CEEN Bit of Cheetah External Cache Error Enable Register enables
 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
 *
 * Cheetah+ also handles (No additional processing required):
 *    DUE, DTO, DBERR   (NCEEN controlled)
 *    THCE      (CEEN and ET_ECC_en controlled)
 *    TUE       (ET_ECC_en controlled)
 *
 * Panther further adds:
 *    IMU, L3_EDU, L3_WDU, L3_CPU       (NCEEN controlled)
 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE  (CEEN controlled)
 *    TUE_SH, TUE       (NCEEN and L2_tag_ECC_en controlled)
 *    L3_TUE, L3_TUE_SH     (NCEEN and ET_ECC_en controlled)
 *    THCE          (CEEN and L2_tag_ECC_en controlled)
 *    L3_THCE           (CEEN and ET_ECC_en controlled)
 *
 * Steps:
 *  1. Disable hardware corrected disrupting errors only (CEEN)
 *  2. Park sibling core if caches are shared (to avoid race
 *     condition while accessing shared resources such as L3
 *     data staging register during CPU logout.
 *  3. If the CPU logout structure is not currently being used:
 *      4. Clear AFSR error bits
 *      5. Capture Ecache, Dcache and Icache lines associated
 *         with AFAR.
 *      6. Unpark sibling core if we parked it earlier.
 *      7. call cpu_disrupting_error via sys_trap at PIL 14
 *         unless we're already running at PIL 15.
 *  4. Otherwise, if the CPU logout structure is busy:
 *      5. Incriment "logout busy count" and place into %g3
 *      6. Unpark sibling core if we parked it earlier.
 *      7. Issue a retry since the other CPU error logging
 *         code will end up finding this error bit and logging
 *         information about it later.
 *  5. Alternatively (to 3 and 4 above), if the cpu_private struct is
 *         not yet initialized such that we can't even check the logout
 *         struct, then we place the clo_flags data into %g2
 *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
 *         systrap. The clo_flags parameter is used to determine information
 *         such as TL, TT, CEEN settings, etc in the high level trap
 *         handler since we don't have access to detailed logout information
 *         in cases where the cpu_private struct is not yet initialized.
 *
 * %g3: [ logout busy count ] - arg #2
 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
 */

    .align  128
    ENTRY_NP(ce_err)
    membar  #Sync           ! Cheetah requires membar #Sync

    /*
     * Disable trap on hardware corrected errors (CEEN) while at TL=0
     * to prevent recursion.
     */
    ldxa    [%g0]ASI_ESTATE_ERR, %g1
    bclr    EN_REG_CEEN, %g1
    stxa    %g1, [%g0]ASI_ESTATE_ERR
    membar  #Sync           ! membar sync required

    /*
     * Save current DCU state.  Turn off Icache to allow capture of
     * Icache data by DO_CPU_LOGOUT.
     */
    ldxa    [%g0]ASI_DCU, %g1   ! save DCU in %g1
    andn    %g1, DCU_IC, %g4
    stxa    %g4, [%g0]ASI_DCU
    flush   %g0 /* flush required after changing the IC bit */

    /*
     * Check to see whether we need to park our sibling core
     * before recording diagnostic information from caches
     * which may be shared by both cores.
     * We use %g1 to store information about whether or not
     * we had to park the core (%g1 holds our DCUCR value and
     * we only use bits from that register which are "reserved"
     * to keep track of core parking) so that we know whether
     * or not to unpark later. %g5 and %g4 are scratch registers.
     */
    PARK_SIBLING_CORE(%g1, %g5, %g4)

    /*
     * Do the CPU log out capture.
     *   %g3 = "failed?" return value.
     *   %g2 = Input = AFAR. Output the clo_flags info which is passed
     *         into this macro via %g4. Output only valid if cpu_private
     *         struct has not been initialized.
     *   CHPR_CECC_LOGOUT = cpu logout structure offset input
     *   %g4 = Trap information stored in the cpu logout flags field
     *   %g5 = scr1
     *   %g6 = scr2
     *   %g3 = scr3
     *   %g4 = scr4
     */
    clr %g4         ! TL=0 bit in afsr
    set CHPR_CECC_LOGOUT, %g6
    DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)

    /*
     * Flush the Icache.  Since we turned off the Icache to capture the
     * Icache line it is now stale or corrupted and we must flush it
     * before re-enabling it.
     */
    GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
    ld  [%g5 + CHPR_ICACHE_LINESIZE], %g6
    ba,pt   %icc, 2f
      ld    [%g5 + CHPR_ICACHE_SIZE], %g5
ce_err_1:
    ASM_LD(%g5, icache_size)
    ASM_LD(%g6, icache_linesize)
2:
    CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)

    /*
     * check to see whether we parked our sibling core at the start
     * of this handler. If so, we need to unpark it here.
     * We use DCUCR reserved bits (stored in %g1) to keep track of
     * whether or not we need to unpark. %g5 and %g4 are scratch registers.
     */
    UNPARK_SIBLING_CORE(%g1, %g5, %g4)

    /*
     * Restore Icache to previous state.
     */
    stxa    %g1, [%g0]ASI_DCU
    flush   %g0 /* flush required after changing the IC bit */

    /*
     * Make sure our CPU logout operation was successful.
     */
    cmp %g3, %g0
    be  4f
      nop

    /*
     * If the logout structure had been busy, how many times have
     * we tried to use it and failed (nesting count)? If we have
     * already recursed a substantial number of times, then we can
     * assume things are not going to get better by themselves and
     * so it would be best to panic.
     */
    cmp %g3, CLO_NESTING_MAX
    blt 3f
      nop

        call ptl1_panic
          mov   PTL1_BAD_ECC, %g1

3:
    /*
     * Otherwise, if the logout structure was busy but we have not
     * nested more times than our maximum value, then we simply
     * issue a retry. Our TL=0 trap handler code will check and
     * clear the AFSR after it is done logging what is currently
     * in the logout struct and handle this event at that time.
     */
    retry
4:
    /*
     * Call cpu_disrupting_error via systrap at PIL 14 unless we're
     * already at PIL 15.
     */
    set cpu_disrupting_error, %g1
    rdpr    %pil, %g4
    cmp %g4, PIL_14
    ba  sys_trap
      movl  %icc, PIL_14, %g4
    SET_SIZE(ce_err)

#endif  /* lint */


#if defined(lint)

/*
 * This trap cannot happen at TL>0 which means this routine will never
 * actually be called and so we treat this like a BAD TRAP panic.
 */
void
ce_err_tl1(void)
{}

#else   /* lint */

    .align  64
    ENTRY_NP(ce_err_tl1)

        call ptl1_panic
          mov   PTL1_BAD_TRAP, %g1

    SET_SIZE(ce_err_tl1)

#endif  /* lint */


#if defined(lint)

void
async_err(void)
{}

#else   /* lint */

/*
 * The async_err function handles deferred trap types 0xA
 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
 *
 * AFSR errors bits which cause this trap are:
 *  UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
 * On some platforms, EMU may causes cheetah to pull the error pin
 * never giving Solaris a chance to take a trap.
 *
 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
 *
 * Steps:
 *  1. Disable CEEN and NCEEN errors to prevent recursive errors.
 *  2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
 *         I$ line in DO_CPU_LOGOUT.
 *  3. Park sibling core if caches are shared (to avoid race
 *     condition while accessing shared resources such as L3
 *     data staging register during CPU logout.
 *  4. If the CPU logout structure is not currently being used:
 *      5. Clear AFSR error bits
 *      6. Capture Ecache, Dcache and Icache lines associated
 *         with AFAR.
 *      7. Unpark sibling core if we parked it earlier.
 *      8. call cpu_deferred_error via sys_trap.
 *  5. Otherwise, if the CPU logout structure is busy:
 *      6. Incriment "logout busy count"
 *      7. Unpark sibling core if we parked it earlier.
 *      8) Issue a retry since the other CPU error logging
 *         code will end up finding this error bit and logging
 *         information about it later.
 *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
 *         not yet initialized such that we can't even check the logout
 *         struct, then we place the clo_flags data into %g2
 *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
 *         systrap. The clo_flags parameter is used to determine information
 *         such as TL, TT, CEEN settings, etc in the high level trap handler
 *         since we don't have access to detailed logout information in cases
 *         where the cpu_private struct is not yet initialized.
 *
 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
 * %g3: [ logout busy count ] - arg #2
 */

    ENTRY_NP(async_err)
    membar  #Sync           ! Cheetah requires membar #Sync

    /*
     * Disable CEEN and NCEEN.
     */
    ldxa    [%g0]ASI_ESTATE_ERR, %g3
    andn    %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
    stxa    %g4, [%g0]ASI_ESTATE_ERR
    membar  #Sync           ! membar sync required

    /*
     * Save current DCU state.
     * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
     * Do this regardless of whether this is a Data Access Error or
     * Instruction Access Error Trap.
     * Disable Dcache for both Data Access Error and Instruction Access
     * Error per Cheetah PRM P.5 Note 6.
     */
    ldxa    [%g0]ASI_DCU, %g1   ! save DCU in %g1
    andn    %g1, DCU_IC + DCU_DC, %g4
    stxa    %g4, [%g0]ASI_DCU
    flush   %g0 /* flush required after changing the IC bit */

    /*
     * Check to see whether we need to park our sibling core
     * before recording diagnostic information from caches
     * which may be shared by both cores.
     * We use %g1 to store information about whether or not
     * we had to park the core (%g1 holds our DCUCR value and
     * we only use bits from that register which are "reserved"
     * to keep track of core parking) so that we know whether
     * or not to unpark later. %g6 and %g4 are scratch registers.
     */
    PARK_SIBLING_CORE(%g1, %g6, %g4)

    /*
     * Do the CPU logout capture.
     *
     *   %g3 = "failed?" return value.
     *   %g2 = Input = AFAR. Output the clo_flags info which is passed
     *         into this macro via %g4. Output only valid if cpu_private
     *         struct has not been initialized.
     *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
     *   %g4 = Trap information stored in the cpu logout flags field
     *   %g5 = scr1
     *   %g6 = scr2
     *   %g3 = scr3
     *   %g4 = scr4
     */
    andcc   %g5, T_TL1, %g0
    clr %g6
    movnz   %xcc, 1, %g6            ! set %g6 if T_TL1 set
    sllx    %g6, CLO_FLAGS_TL_SHIFT, %g6
    sllx    %g5, CLO_FLAGS_TT_SHIFT, %g4
    set CLO_FLAGS_TT_MASK, %g2
    and %g4, %g2, %g4           ! ttype
    or  %g6, %g4, %g4           ! TT and TL
    and %g3, EN_REG_CEEN, %g3       ! CEEN value
    or  %g3, %g4, %g4           ! TT and TL and CEEN
    set CHPR_ASYNC_LOGOUT, %g6
    DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)

    /*
     * If the logout struct was busy, we may need to pass the
     * TT, TL, and CEEN information to the TL=0 handler via
     * systrap parameter so save it off here.
     */
    cmp %g3, %g0
    be  1f
      nop
    sllx    %g4, 32, %g4
    or  %g4, %g3, %g3
1:
    /*
     * Flush the Icache.  Since we turned off the Icache to capture the
     * Icache line it is now stale or corrupted and we must flush it
     * before re-enabling it.
     */
    GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
    ld  [%g5 + CHPR_ICACHE_LINESIZE], %g6
    ba,pt   %icc, 2f
      ld    [%g5 + CHPR_ICACHE_SIZE], %g5
async_err_1:
    ASM_LD(%g5, icache_size)
    ASM_LD(%g6, icache_linesize)
2:
    CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)

    /*
     * XXX - Don't we need to flush the Dcache before turning it back
     *       on to avoid stale or corrupt data? Was this broken?
     */
    /*
     * Flush the Dcache before turning it back on since it may now
     * contain stale or corrupt data.
     */
    ASM_LD(%g5, dcache_size)
    ASM_LD(%g6, dcache_linesize)
    CH_DCACHE_FLUSHALL(%g5, %g6, %g7)

    /*
     * check to see whether we parked our sibling core at the start
     * of this handler. If so, we need to unpark it here.
     * We use DCUCR reserved bits (stored in %g1) to keep track of
     * whether or not we need to unpark. %g5 and %g7 are scratch registers.
     */
    UNPARK_SIBLING_CORE(%g1, %g5, %g7)

    /*
     * Restore Icache and Dcache to previous state.
     */
    stxa    %g1, [%g0]ASI_DCU
    flush   %g0 /* flush required after changing the IC bit */

    /*
     * Make sure our CPU logout operation was successful.
     */
    cmp %g3, %g0
    be  4f
      nop

    /*
     * If the logout structure had been busy, how many times have
     * we tried to use it and failed (nesting count)? If we have
     * already recursed a substantial number of times, then we can
     * assume things are not going to get better by themselves and
     * so it would be best to panic.
     */
    cmp %g3, CLO_NESTING_MAX
    blt 3f
      nop

        call ptl1_panic
          mov   PTL1_BAD_ECC, %g1

3:
    /*
     * Otherwise, if the logout structure was busy but we have not
     * nested more times than our maximum value, then we simply
     * issue a retry. Our TL=0 trap handler code will check and
     * clear the AFSR after it is done logging what is currently
     * in the logout struct and handle this event at that time.
     */
    retry
4:
    RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
async_err_resetskip:
    set cpu_deferred_error, %g1
    ba  sys_trap
      mov   PIL_15, %g4     ! run at pil 15
    SET_SIZE(async_err)

#endif  /* lint */

#if defined(CPU_IMP_L1_CACHE_PARITY)

/*
 * D$ parity error trap (trap 71) at TL=0.
 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
 * the various architecture-specific files.  This merely sets up the
 * arguments for cpu_parity_error and calls it via sys_trap.
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */
#if defined(lint)

void
dcache_parity_instr(void)
{}

#else   /* lint */
    ENTRY_NP(dcache_parity_instr)
    membar  #Sync           ! Cheetah+ requires membar #Sync
    set cpu_parity_error, %g1
    or  %g0, CH_ERR_DPE, %g2
    rdpr    %tpc, %g3
    sethi   %hi(sys_trap), %g7
    jmp %g7 + %lo(sys_trap)
      mov   PIL_15, %g4     ! run at pil 15
    SET_SIZE(dcache_parity_instr)

#endif  /* lint */


/*
 * D$ parity error trap (trap 71) at TL>0.
 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
 * the various architecture-specific files.  This generates a "Software
 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
 * continue the handling there.
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */
#if defined(lint)

void
dcache_parity_tl1_instr(void)
{}

#else   /* lint */
    ENTRY_NP(dcache_parity_tl1_instr)
    CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
    SET_SIZE(dcache_parity_tl1_instr)

#endif  /* lint */


/*
 * Software trap 1 at TL>0.
 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
 * of the various architecture-specific files.  This is used as a continuation
 * of the dcache parity handling where we've bought an extra TL level, so we
 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
 * order two bits from %g1 and %g2 respectively).
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */
#if defined(lint)

void
dcache_parity_tl1_cont_instr(void)
{}

#else   /* lint */
    ENTRY_NP(dcache_parity_tl1_cont_instr)
    CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
    SET_SIZE(dcache_parity_tl1_cont_instr)

#endif  /* lint */

/*
 * D$ parity error at TL>0 handler
 * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
 */
#if defined(lint)

void
dcache_parity_tl1_err(void)
{}

#else   /* lint */

    ENTRY_NP(dcache_parity_tl1_err)

    /*
     * This macro saves all the %g registers in the ch_err_tl1_data
     * structure, updates the ch_err_tl1_flags and saves the %tpc in
     * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
     * the ch_err_tl1_data structure and %g2 will have the original
     * flags in the ch_err_tl1_data structure.  All %g registers
     * except for %g1 and %g2 will be available.
     */
    CH_ERR_TL1_ENTER(CH_ERR_DPE);

#ifdef TRAPTRACE
    /*
     * Get current trap trace entry physical pointer.
     */
    CPU_INDEX(%g6, %g5)
    sll %g6, TRAPTR_SIZE_SHIFT, %g6
    set trap_trace_ctl, %g5
    add %g6, %g5, %g6
    ld  [%g6 + TRAPTR_LIMIT], %g5
    tst %g5
    be  %icc, dpe_tl1_skip_tt
      nop
    ldx [%g6 + TRAPTR_PBASE], %g5
    ld  [%g6 + TRAPTR_OFFSET], %g4
    add %g5, %g4, %g5

    /*
     * Create trap trace entry.
     */
    rd  %asi, %g7
    wr  %g0, TRAPTR_ASI, %asi
    rd  STICK, %g4
    stxa    %g4, [%g5 + TRAP_ENT_TICK]%asi
    rdpr    %tl, %g4
    stha    %g4, [%g5 + TRAP_ENT_TL]%asi
    rdpr    %tt, %g4
    stha    %g4, [%g5 + TRAP_ENT_TT]%asi
    rdpr    %tpc, %g4
    stna    %g4, [%g5 + TRAP_ENT_TPC]%asi
    rdpr    %tstate, %g4
    stxa    %g4, [%g5 + TRAP_ENT_TSTATE]%asi
    stna    %sp, [%g5 + TRAP_ENT_SP]%asi
    stna    %g0, [%g5 + TRAP_ENT_TR]%asi
    stna    %g0, [%g5 + TRAP_ENT_F1]%asi
    stna    %g0, [%g5 + TRAP_ENT_F2]%asi
    stna    %g0, [%g5 + TRAP_ENT_F3]%asi
    stna    %g0, [%g5 + TRAP_ENT_F4]%asi
    wr  %g0, %g7, %asi

    /*
     * Advance trap trace pointer.
     */
    ld  [%g6 + TRAPTR_OFFSET], %g5
    ld  [%g6 + TRAPTR_LIMIT], %g4
    st  %g5, [%g6 + TRAPTR_LAST_OFFSET]
    add %g5, TRAP_ENT_SIZE, %g5
    sub %g4, TRAP_ENT_SIZE, %g4
    cmp %g5, %g4
    movge   %icc, 0, %g5
    st  %g5, [%g6 + TRAPTR_OFFSET]
dpe_tl1_skip_tt:
#endif  /* TRAPTRACE */

    /*
     * I$ and D$ are automatically turned off by HW when the CPU hits
     * a dcache or icache parity error so we will just leave those two
     * off for now to avoid repeating this trap.
     * For Panther, however, since we trap on P$ data parity errors
     * and HW does not automatically disable P$, we need to disable it
     * here so that we don't encounter any recursive traps when we
     * issue the retry.
     */
    ldxa    [%g0]ASI_DCU, %g3
    mov 1, %g4
    sllx    %g4, DCU_PE_SHIFT, %g4
    andn    %g3, %g4, %g3
    stxa    %g3, [%g0]ASI_DCU
    membar  #Sync

    /*
     * We fall into this macro if we've successfully logged the error in
     * the ch_err_tl1_data structure and want the PIL15 softint to pick
     * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
     * Restores the %g registers and issues retry.
     */
    CH_ERR_TL1_EXIT;
    SET_SIZE(dcache_parity_tl1_err)

#endif  /* lint */

/*
 * I$ parity error trap (trap 72) at TL=0.
 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
 * the various architecture-specific files.  This merely sets up the
 * arguments for cpu_parity_error and calls it via sys_trap.
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */
#if defined(lint)

void
icache_parity_instr(void)
{}

#else   /* lint */

    ENTRY_NP(icache_parity_instr)
    membar  #Sync           ! Cheetah+ requires membar #Sync
    set cpu_parity_error, %g1
    or  %g0, CH_ERR_IPE, %g2
    rdpr    %tpc, %g3
    sethi   %hi(sys_trap), %g7
    jmp %g7 + %lo(sys_trap)
      mov   PIL_15, %g4     ! run at pil 15
    SET_SIZE(icache_parity_instr)

#endif  /* lint */

/*
 * I$ parity error trap (trap 72) at TL>0.
 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
 * the various architecture-specific files.  This generates a "Software
 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
 * continue the handling there.
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */
#if defined(lint)

void
icache_parity_tl1_instr(void)
{}

#else   /* lint */
    ENTRY_NP(icache_parity_tl1_instr)
    CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
    SET_SIZE(icache_parity_tl1_instr)

#endif  /* lint */

/*
 * Software trap 2 at TL>0.
 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
 * of the various architecture-specific files.  This is used as a continuation
 * of the icache parity handling where we've bought an extra TL level, so we
 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
 * order two bits from %g1 and %g2 respectively).
 * NB: Must be 8 instructions or less to fit in trap table and code must
 *     be relocatable.
 */
#if defined(lint)

void
icache_parity_tl1_cont_instr(void)
{}

#else   /* lint */
    ENTRY_NP(icache_parity_tl1_cont_instr)
    CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
    SET_SIZE(icache_parity_tl1_cont_instr)

#endif  /* lint */


/*
 * I$ parity error at TL>0 handler
 * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
 */
#if defined(lint)

void
icache_parity_tl1_err(void)
{}

#else   /* lint */

    ENTRY_NP(icache_parity_tl1_err)

    /*
     * This macro saves all the %g registers in the ch_err_tl1_data
     * structure, updates the ch_err_tl1_flags and saves the %tpc in
     * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
     * the ch_err_tl1_data structure and %g2 will have the original
     * flags in the ch_err_tl1_data structure.  All %g registers
     * except for %g1 and %g2 will be available.
     */
    CH_ERR_TL1_ENTER(CH_ERR_IPE);

#ifdef TRAPTRACE
    /*
     * Get current trap trace entry physical pointer.
     */
    CPU_INDEX(%g6, %g5)
    sll %g6, TRAPTR_SIZE_SHIFT, %g6
    set trap_trace_ctl, %g5
    add %g6, %g5, %g6
    ld  [%g6 + TRAPTR_LIMIT], %g5
    tst %g5
    be  %icc, ipe_tl1_skip_tt
      nop
    ldx [%g6 + TRAPTR_PBASE], %g5
    ld  [%g6 + TRAPTR_OFFSET], %g4
    add %g5, %g4, %g5

    /*
     * Create trap trace entry.
     */
    rd  %asi, %g7
    wr  %g0, TRAPTR_ASI, %asi
    rd  STICK, %g4
    stxa    %g4, [%g5 + TRAP_ENT_TICK]%asi
    rdpr    %tl, %g4
    stha    %g4, [%g5 + TRAP_ENT_TL]%asi
    rdpr    %tt, %g4
    stha    %g4, [%g5 + TRAP_ENT_TT]%asi
    rdpr    %tpc, %g4
    stna    %g4, [%g5 + TRAP_ENT_TPC]%asi
    rdpr    %tstate, %g4
    stxa    %g4, [%g5 + TRAP_ENT_TSTATE]%asi
    stna    %sp, [%g5 + TRAP_ENT_SP]%asi
    stna    %g0, [%g5 + TRAP_ENT_TR]%asi
    stna    %g0, [%g5 + TRAP_ENT_F1]%asi
    stna    %g0, [%g5 + TRAP_ENT_F2]%asi
    stna    %g0, [%g5 + TRAP_ENT_F3]%asi
    stna    %g0, [%g5 + TRAP_ENT_F4]%asi
    wr  %g0, %g7, %asi

    /*
     * Advance trap trace pointer.
     */
    ld  [%g6 + TRAPTR_OFFSET], %g5
    ld  [%g6 + TRAPTR_LIMIT], %g4
    st  %g5, [%g6 + TRAPTR_LAST_OFFSET]
    add %g5, TRAP_ENT_SIZE, %g5
    sub %g4, TRAP_ENT_SIZE, %g4
    cmp %g5, %g4
    movge   %icc, 0, %g5
    st  %g5, [%g6 + TRAPTR_OFFSET]
ipe_tl1_skip_tt:
#endif  /* TRAPTRACE */

    /*
     * We fall into this macro if we've successfully logged the error in
     * the ch_err_tl1_data structure and want the PIL15 softint to pick
     * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
     * Restores the %g registers and issues retry.
     */
    CH_ERR_TL1_EXIT;

    SET_SIZE(icache_parity_tl1_err)

#endif  /* lint */

#endif  /* CPU_IMP_L1_CACHE_PARITY */


/*
 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
 * should only be used in places where you have no choice but to look at the
 * tlb itself.
 *
 * Note: These two routines are required by the Estar "cpr" loadable module.
 */

#if defined(lint)

/* ARGSUSED */
void
itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
{}

#else   /* lint */

    ENTRY_NP(itlb_rd_entry)
    sllx    %o0, 3, %o0
    ldxa    [%o0]ASI_ITLB_ACCESS, %g1
    stx %g1, [%o1]
    ldxa    [%o0]ASI_ITLB_TAGREAD, %g2
    set TAGREAD_CTX_MASK, %o4
    andn    %g2, %o4, %o5
    retl
      stx   %o5, [%o2]
    SET_SIZE(itlb_rd_entry)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
{}

#else   /* lint */

    ENTRY_NP(dtlb_rd_entry)
    sllx    %o0, 3, %o0
    ldxa    [%o0]ASI_DTLB_ACCESS, %g1
    stx %g1, [%o1]
    ldxa    [%o0]ASI_DTLB_TAGREAD, %g2
    set TAGREAD_CTX_MASK, %o4
    andn    %g2, %o4, %o5
    retl
      stx   %o5, [%o2]
    SET_SIZE(dtlb_rd_entry)
#endif  /* lint */


#if !(defined(JALAPENO) || defined(SERRANO))

#if defined(lint)

uint64_t
get_safari_config(void)
{ return (0); }

#else   /* lint */

    ENTRY(get_safari_config)
    ldxa    [%g0]ASI_SAFARI_CONFIG, %o0
    retl
    nop
    SET_SIZE(get_safari_config)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
set_safari_config(uint64_t safari_config)
{}

#else   /* lint */

    ENTRY(set_safari_config)
    stxa    %o0, [%g0]ASI_SAFARI_CONFIG
    membar  #Sync
    retl
    nop
    SET_SIZE(set_safari_config)

#endif  /* lint */

#endif  /* !(JALAPENO || SERRANO) */


#if defined(lint)

void
cpu_cleartickpnt(void)
{}

#else   /* lint */
    /*
     * Clear the NPT (non-privileged trap) bit in the %tick/%stick
     * registers. In an effort to make the change in the
     * tick/stick counter as consistent as possible, we disable
     * all interrupts while we're changing the registers. We also
     * ensure that the read and write instructions are in the same
     * line in the instruction cache.
     */
    ENTRY_NP(cpu_clearticknpt)
    rdpr    %pstate, %g1        /* save processor state */
    andn    %g1, PSTATE_IE, %g3 /* turn off */
    wrpr    %g0, %g3, %pstate   /*   interrupts */
    rdpr    %tick, %g2      /* get tick register */
    brgez,pn %g2, 1f        /* if NPT bit off, we're done */
    mov 1, %g3          /* create mask */
    sllx    %g3, 63, %g3        /*   for NPT bit */
    ba,a,pt %xcc, 2f
    .align  8           /* Ensure rd/wr in same i$ line */
2:
    rdpr    %tick, %g2      /* get tick register */
    wrpr    %g3, %g2, %tick     /* write tick register, */
                    /*   clearing NPT bit   */
1:
    rd  STICK, %g2      /* get stick register */
    brgez,pn %g2, 3f        /* if NPT bit off, we're done */
    mov 1, %g3          /* create mask */
    sllx    %g3, 63, %g3        /*   for NPT bit */
    ba,a,pt %xcc, 4f
    .align  8           /* Ensure rd/wr in same i$ line */
4:
    rd  STICK, %g2      /* get stick register */
    wr  %g3, %g2, STICK     /* write stick register, */
                    /*   clearing NPT bit   */
3:
    jmp %g4 + 4
    wrpr    %g0, %g1, %pstate   /* restore processor state */

    SET_SIZE(cpu_clearticknpt)

#endif  /* lint */


#if defined(CPU_IMP_L1_CACHE_PARITY)

#if defined(lint)
/*
 * correct_dcache_parity(size_t size, size_t linesize)
 *
 * Correct D$ data parity by zeroing the data and initializing microtag
 * for all indexes and all ways of the D$.
 *
 */
/* ARGSUSED */
void
correct_dcache_parity(size_t size, size_t linesize)
{}

#else   /* lint */

    ENTRY(correct_dcache_parity)
    /*
     * Register Usage:
     *
     * %o0 = input D$ size
     * %o1 = input D$ line size
     * %o2 = scratch
     * %o3 = scratch
     * %o4 = scratch
     */

    sub %o0, %o1, %o0           ! init cache line address

    /*
     * For Panther CPUs, we also need to clear the data parity bits
     * using DC_data_parity bit of the ASI_DCACHE_DATA register.
     */
    GET_CPU_IMPL(%o3)
    cmp %o3, PANTHER_IMPL
    bne 1f
      clr   %o3             ! zero for non-Panther
    mov 1, %o3
    sll %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3

1:
    /*
     * Set utag = way since it must be unique within an index.
     */
    srl %o0, 14, %o2            ! get cache way (DC_way)
    membar  #Sync               ! required before ASI_DC_UTAG
    stxa    %o2, [%o0]ASI_DC_UTAG       ! set D$ utag = cache way
    membar  #Sync               ! required after ASI_DC_UTAG

    /*
     * Zero line of D$ data (and data parity bits for Panther)
     */
    sub %o1, 8, %o2
    or  %o0, %o3, %o4           ! same address + DC_data_parity
2:
    membar  #Sync               ! required before ASI_DC_DATA
    stxa    %g0, [%o0 + %o2]ASI_DC_DATA ! zero 8 bytes of D$ data
    membar  #Sync               ! required after ASI_DC_DATA
    /*
     * We also clear the parity bits if this is a panther. For non-Panther
     * CPUs, we simply end up clearing the $data register twice.
     */
    stxa    %g0, [%o4 + %o2]ASI_DC_DATA
    membar  #Sync

    subcc   %o2, 8, %o2
    bge 2b
    nop

    subcc   %o0, %o1, %o0
    bge 1b
    nop

    retl
      nop
    SET_SIZE(correct_dcache_parity)

#endif  /* lint */

#endif  /* CPU_IMP_L1_CACHE_PARITY */


#if defined(lint)
/*
 *  Get timestamp (stick).
 */
/* ARGSUSED */
void
stick_timestamp(int64_t *ts)
{
}

#else   /* lint */

    ENTRY_NP(stick_timestamp)
    rd  STICK, %g1  ! read stick reg
    sllx    %g1, 1, %g1
    srlx    %g1, 1, %g1 ! clear npt bit

    retl
    stx     %g1, [%o0]  ! store the timestamp
    SET_SIZE(stick_timestamp)

#endif  /* lint */


#if defined(lint)
/*
 * Set STICK adjusted by skew.
 */
/* ARGSUSED */
void
stick_adj(int64_t skew)
{
}

#else   /* lint */

    ENTRY_NP(stick_adj)
    rdpr    %pstate, %g1        ! save processor state
    andn    %g1, PSTATE_IE, %g3
    ba  1f          ! cache align stick adj
    wrpr    %g0, %g3, %pstate   ! turn off interrupts

    .align  16
1:  nop

    rd  STICK, %g4      ! read stick reg
    add %g4, %o0, %o1       ! adjust stick with skew
    wr  %o1, %g0, STICK     ! write stick reg

    retl
    wrpr    %g1, %pstate        ! restore processor state
    SET_SIZE(stick_adj)

#endif  /* lint */

#if defined(lint)
/*
 * Debugger-specific stick retrieval
 */
/*ARGSUSED*/
int
kdi_get_stick(uint64_t *stickp)
{
    return (0);
}

#else   /* lint */

    ENTRY_NP(kdi_get_stick)
    rd  STICK, %g1
    stx %g1, [%o0]
    retl
    mov %g0, %o0
    SET_SIZE(kdi_get_stick)

#endif  /* lint */

#if defined(lint)
/*
 * Invalidate the specified line from the D$.
 *
 * Register usage:
 *  %o0 - index for the invalidation, specifies DC_way and DC_addr
 *
 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
 *
 * The format of the stored 64-bit value is:
 *
 *  +----------+--------+----------+
 *  | Reserved | DC_tag | DC_valid |
 *  +----------+--------+----------+
 *       63      31 30     1          0
 *
 * DC_tag is the 30-bit physical tag of the associated line.
 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
 *
 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
 *
 *  +----------+--------+----------+----------+
 *  | Reserved | DC_way | DC_addr  | Reserved |
 *  +----------+--------+----------+----------+
 *       63      16 15    14 13       5 4        0
 *
 * DC_way is a 2-bit index that selects one of the 4 ways.
 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
 *
 * Setting the DC_valid bit to zero for the specified DC_way and
 * DC_addr index into the D$ results in an invalidation of a D$ line.
 */
/*ARGSUSED*/
void
dcache_inval_line(int index)
{
}
#else   /* lint */
    ENTRY(dcache_inval_line)
    sll %o0, 5, %o0     ! shift index into DC_way and DC_addr
    stxa    %g0, [%o0]ASI_DC_TAG    ! zero the DC_valid and DC_tag bits
    membar  #Sync
    retl
    nop
    SET_SIZE(dcache_inval_line)
#endif  /* lint */

#if defined(lint)
/*
 * Invalidate the entire I$
 *
 * Register usage:
 *  %o0 - specifies IC_way, IC_addr, IC_tag
 *  %o1 - scratch
 *  %o2 - used to save and restore DCU value
 *  %o3 - scratch
 *  %o5 - used to save and restore PSTATE
 *
 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
 * block out snoops and invalidates to the I$, causing I$ consistency
 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
 *
 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
 * info below describes store (write) use of ASI_IC_TAG. Note that read
 * use of ASI_IC_TAG behaves differently.
 *
 * The format of the stored 64-bit value is:
 *
 *  +----------+--------+---------------+-----------+
 *  | Reserved | Valid  | IC_vpred<7:0> | Undefined |
 *  +----------+--------+---------------+-----------+
 *       63      55    54    53           46 45        0
 *
 * Valid is the 1-bit valid field for both the physical and snoop tags.
 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
 *  the 32-byte boundary aligned address specified by IC_addr.
 *
 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
 *
 *  +----------+--------+---------+--------+---------+
 *  | Reserved | IC_way | IC_addr | IC_tag |Reserved |
 *  +----------+--------+---------+--------+---------+
 *       63      16 15    14 13      5 4      3 2       0
 *
 * IC_way is a 2-bit index that selects one of the 4 ways.
 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
 * IC_addr[5] is a "don't care" for a store.
 * IC_tag set to 2 specifies that the stored value is to be interpreted
 *  as containing Valid and IC_vpred as described above.
 *
 * Setting the Valid bit to zero for the specified IC_way and
 * IC_addr index into the I$ results in an invalidation of an I$ line.
 */
/*ARGSUSED*/
void
icache_inval_all(void)
{
}
#else   /* lint */
    ENTRY(icache_inval_all)
    rdpr    %pstate, %o5
    andn    %o5, PSTATE_IE, %o3
    wrpr    %g0, %o3, %pstate   ! clear IE bit

    GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
    ld  [%o0 + CHPR_ICACHE_LINESIZE], %o1
    ba,pt   %icc, 2f
      ld    [%o0 + CHPR_ICACHE_SIZE], %o0
icache_inval_all_1:
    ASM_LD(%o0, icache_size)
    ASM_LD(%o1, icache_linesize)
2:
    CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)

    retl
    wrpr    %g0, %o5, %pstate   ! restore earlier pstate
    SET_SIZE(icache_inval_all)
#endif  /* lint */


#if defined(lint)
/* ARGSUSED */
void
cache_scrubreq_tl1(uint64_t inum, uint64_t index)
{
}

#else   /* lint */
/*
 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
 * crosstrap.  It atomically increments the outstanding request counter and,
 * if there was not already an outstanding request, branches to setsoftint_tl1
 * to enqueue an intr_vec for the given inum.
 */

    ! Register usage:
    !
    ! Arguments:
    ! %g1 - inum
    ! %g2 - index into chsm_outstanding array
    !
    ! Internal:
    ! %g2, %g3, %g5 - scratch
    ! %g4 - ptr. to scrub_misc chsm_outstanding[index].
    ! %g6 - setsoftint_tl1 address

    ENTRY_NP(cache_scrubreq_tl1)
    mulx    %g2, CHSM_OUTSTANDING_INCR, %g2
    set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
    add %g2, %g3, %g2
    GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
    ld  [%g4], %g2      ! cpu's chsm_outstanding[index]
    !
    ! no need to use atomic instructions for the following
    ! increment - we're at tl1
    !
    add %g2, 0x1, %g3
    brnz,pn %g2, 1f         ! no need to enqueue more intr_vec
      st    %g3, [%g4]      ! delay - store incremented counter
    ASM_JMP(%g6, setsoftint_tl1)
    ! not reached
1:
    retry
    SET_SIZE(cache_scrubreq_tl1)

#endif  /* lint */


#if defined(lint)

/* ARGSUSED */
void
get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs)
{}

#else   /* lint */

/*
 * Get the error state for the processor.
 * Note that this must not be used at TL>0
 */
    ENTRY(get_cpu_error_state)
#if defined(CHEETAH_PLUS)
    set ASI_SHADOW_REG_VA, %o2
    ldxa    [%o2]ASI_AFSR, %o1      ! shadow afsr reg
    stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
    ldxa    [%o2]ASI_AFAR, %o1      ! shadow afar reg
    stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
    GET_CPU_IMPL(%o3)   ! Only panther has AFSR_EXT registers
    cmp %o3, PANTHER_IMPL
    bne,a   1f
      stx   %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN
    set ASI_AFSR_EXT_VA, %o2
    ldxa    [%o2]ASI_AFSR, %o1      ! afsr_ext reg
    stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
    set ASI_SHADOW_AFSR_EXT_VA, %o2
    ldxa    [%o2]ASI_AFSR, %o1      ! shadow afsr_ext reg
    stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
    b   2f
      nop
1:
    stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
2:
#else   /* CHEETAH_PLUS */
    stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
    stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
    stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
    stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
#endif  /* CHEETAH_PLUS */
#if defined(SERRANO)
    /*
     * Serrano has an afar2 which captures the address on FRC/FRU errors.
     * We save this in the afar2 of the register save area.
     */
    set ASI_MCU_AFAR2_VA, %o2
    ldxa    [%o2]ASI_MCU_CTRL, %o1
    stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2]
#endif  /* SERRANO */
    ldxa    [%g0]ASI_AFSR, %o1      ! primary afsr reg
    stx %o1, [%o0 + CH_CPU_ERRORS_AFSR]
    ldxa    [%g0]ASI_AFAR, %o1      ! primary afar reg
    retl
    stx %o1, [%o0 + CH_CPU_ERRORS_AFAR]
    SET_SIZE(get_cpu_error_state)
#endif  /* lint */

#if defined(lint)

/*
 * Check a page of memory for errors.
 *
 * Load each 64 byte block from physical memory.
 * Check AFSR after each load to see if an error
 * was caused. If so, log/scrub that error.
 *
 * Used to determine if a page contains
 * CEs when CEEN is disabled.
 */
/*ARGSUSED*/
void
cpu_check_block(caddr_t va, uint_t psz)
{}

#else   /* lint */

    ENTRY(cpu_check_block)
    !
    ! get a new window with room for the error regs
    !
    save    %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
    srl %i1, 6, %l4     ! clear top bits of psz
                    ! and divide by 64
    rd  %fprs, %l2      ! store FP
    wr  %g0, FPRS_FEF, %fprs    ! enable FP
1:
    ldda    [%i0]ASI_BLK_P, %d0 ! load a block
    membar  #Sync
    ldxa    [%g0]ASI_AFSR, %l3  ! read afsr reg
    brz,a,pt %l3, 2f        ! check for error
    nop

    !
    ! if error, read the error regs and log it
    !
    call    get_cpu_error_state
    add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0

    !
    ! cpu_ce_detected(ch_cpu_errors_t *, flag)
    !
    call    cpu_ce_detected     ! log the error
    mov CE_CEEN_TIMEOUT, %o1
2:
    dec %l4         ! next 64-byte block
    brnz,a,pt  %l4, 1b
    add %i0, 64, %i0        ! increment block addr

    wr  %l2, %g0, %fprs     ! restore FP
    ret
    restore

    SET_SIZE(cpu_check_block)

#endif  /* lint */

#if defined(lint)

/*
 * Perform a cpu logout called from C.  This is used where we did not trap
 * for the error but still want to gather "what we can".  Caller must make
 * sure cpu private area exists and that the indicated logout area is free
 * for use, and that we are unable to migrate cpus.
 */
/*ARGSUSED*/
void
cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop)
{ }

#else
    ENTRY(cpu_delayed_logout)
    rdpr    %pstate, %o2
    andn    %o2, PSTATE_IE, %o2
    wrpr    %g0, %o2, %pstate       ! disable interrupts
    PARK_SIBLING_CORE(%o2, %o3, %o4)    ! %o2 has DCU value
    add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
    rd  %asi, %g1
    wr  %g0, ASI_P, %asi
    GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
    wr  %g1, %asi
    UNPARK_SIBLING_CORE(%o2, %o3, %o4)  ! can use %o2 again
    rdpr    %pstate, %o2
    or  %o2, PSTATE_IE, %o2
    wrpr    %g0, %o2, %pstate
    retl
      nop
    SET_SIZE(cpu_delayed_logout)

#endif  /* lint */

#if defined(lint)

/*ARGSUSED*/
int
dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
{ return (0); }

#else

    ENTRY(dtrace_blksuword32)
    save    %sp, -SA(MINFRAME + 4), %sp

    rdpr    %pstate, %l1
    andn    %l1, PSTATE_IE, %l2     ! disable interrupts to
    wrpr    %g0, %l2, %pstate       ! protect our FPU diddling

    rd  %fprs, %l0
    andcc   %l0, FPRS_FEF, %g0
    bz,a,pt %xcc, 1f            ! if the fpu is disabled
    wr  %g0, FPRS_FEF, %fprs        ! ... enable the fpu

    st  %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack
1:
    set 0f, %l5
        /*
         * We're about to write a block full or either total garbage
         * (not kernel data, don't worry) or user floating-point data
         * (so it only _looks_ like garbage).
         */
    ld  [%i1], %f0          ! modify the block
    membar  #Sync
    stn %l5, [THREAD_REG + T_LOFAULT]   ! set up the lofault handler
    stda    %d0, [%i0]ASI_BLK_COMMIT_S  ! store the modified block
    membar  #Sync
    stn %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler

    bz,a,pt %xcc, 1f
    wr  %g0, %l0, %fprs         ! restore %fprs

    ld  [%fp + STACK_BIAS - 4], %f0 ! restore %f0
1:

    wrpr    %g0, %l1, %pstate       ! restore interrupts

    ret
    restore %g0, %g0, %o0

0:
    membar  #Sync
    stn %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler

    bz,a,pt %xcc, 1f
    wr  %g0, %l0, %fprs         ! restore %fprs

    ld  [%fp + STACK_BIAS - 4], %f0 ! restore %f0
1:

    wrpr    %g0, %l1, %pstate       ! restore interrupts

    /*
     * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
     * which deals with watchpoints. Otherwise, just return -1.
     */
    brnz,pt %i2, 1f
    nop
    ret
    restore %g0, -1, %o0
1:
    call    dtrace_blksuword32_err
    restore

    SET_SIZE(dtrace_blksuword32)

#endif /* lint */

#ifdef  CHEETAHPLUS_ERRATUM_25

#if defined(lint)
/*
 * Claim a chunk of physical address space.
 */
/*ARGSUSED*/
void
claimlines(uint64_t pa, size_t sz, int stride)
{}
#else   /* lint */
    ENTRY(claimlines)
1:
    subcc   %o1, %o2, %o1
    add %o0, %o1, %o3
    bgeu,a,pt   %xcc, 1b
    casxa   [%o3]ASI_MEM, %g0, %g0
    membar  #Sync
    retl
    nop
    SET_SIZE(claimlines)
#endif  /* lint */

#if defined(lint)
/*
 * CPU feature initialization,
 * turn BPE off,
 * get device id.
 */
/*ARGSUSED*/
void
cpu_feature_init(void)
{}
#else   /* lint */
    ENTRY(cpu_feature_init)
    save    %sp, -SA(MINFRAME), %sp
    sethi   %hi(cheetah_bpe_off), %o0
    ld  [%o0 + %lo(cheetah_bpe_off)], %o0
    brz %o0, 1f
    nop
    rd  ASR_DISPATCH_CONTROL, %o0
    andn    %o0, ASR_DISPATCH_CONTROL_BPE, %o0
    wr  %o0, 0, ASR_DISPATCH_CONTROL
1:
    !
    ! get the device_id and store the device_id
    ! in the appropriate cpunodes structure
    ! given the cpus index
    !
    CPU_INDEX(%o0, %o1)
    mulx %o0, CPU_NODE_SIZE, %o0
    set  cpunodes + DEVICE_ID, %o1
    ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
    stx  %o2, [%o0 + %o1]
#ifdef  CHEETAHPLUS_ERRATUM_34
    !
    ! apply Cheetah+ erratum 34 workaround
    !
    call itlb_erratum34_fixup
      nop
    call dtlb_erratum34_fixup
      nop
#endif  /* CHEETAHPLUS_ERRATUM_34 */
    ret
      restore
    SET_SIZE(cpu_feature_init)
#endif  /* lint */

#if defined(lint)
/*
 * Copy a tsb entry atomically, from src to dest.
 * src must be 128 bit aligned.
 */
/*ARGSUSED*/
void
copy_tsb_entry(uintptr_t src, uintptr_t dest)
{}
#else   /* lint */
    ENTRY(copy_tsb_entry)
    ldda    [%o0]ASI_NQUAD_LD, %o2      ! %o2 = tag, %o3 = data
    stx %o2, [%o1]
    stx %o3, [%o1 + 8 ]
    retl
    nop
    SET_SIZE(copy_tsb_entry)
#endif  /* lint */

#endif  /* CHEETAHPLUS_ERRATUM_25 */

#ifdef  CHEETAHPLUS_ERRATUM_34

#if defined(lint)

/*ARGSUSED*/
void
itlb_erratum34_fixup(void)
{}

#else   /* lint */

    !
    ! In Cheetah+ erratum 34, under certain conditions an ITLB locked
    ! index 0 TTE will erroneously be displaced when a new TTE is
    ! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
    ! locked index 0 TTEs must be relocated.
    !
    ! NOTE: Care must be taken to avoid an ITLB miss in this routine.
    !
    ENTRY_NP(itlb_erratum34_fixup)
    rdpr    %pstate, %o3
#ifdef DEBUG
    PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
#endif /* DEBUG */
    wrpr    %o3, PSTATE_IE, %pstate     ! Disable interrupts
    ldxa    [%g0]ASI_ITLB_ACCESS, %o1   ! %o1 = entry 0 data
    ldxa    [%g0]ASI_ITLB_TAGREAD, %o2  ! %o2 = entry 0 tag

    cmp %o1, %g0            ! Is this entry valid?
    bge %xcc, 1f
      andcc %o1, TTE_LCK_INT, %g0       ! Is this entry locked?
    bnz %icc, 2f
      nop
1:
    retl                    ! Nope, outta here...
      wrpr  %g0, %o3, %pstate       ! Enable interrupts
2:
    sethi   %hi(FLUSH_ADDR), %o4
    stxa    %g0, [%o2]ASI_ITLB_DEMAP    ! Flush this mapping
    flush   %o4             ! Flush required for I-MMU
    !
    ! Start search from index 1 up.  This is because the kernel force
    ! loads its text page at index 15 in sfmmu_kernel_remap() and we
    ! don't want our relocated entry evicted later.
    !
    ! NOTE: We assume that we'll be successful in finding an unlocked
    ! or invalid entry.  If that isn't the case there are bound to
    ! bigger problems.
    !
    set (1 << 3), %g3
3:
    ldxa    [%g3]ASI_ITLB_ACCESS, %o4   ! Load TTE from t16
    !
    ! If this entry isn't valid, we'll choose to displace it (regardless
    ! of the lock bit).
    !
    cmp %o4, %g0            ! TTE is > 0 iff not valid
    bge %xcc, 4f            ! If invalid, go displace
      andcc %o4, TTE_LCK_INT, %g0       ! Check for lock bit
    bnz,a   %icc, 3b            ! If locked, look at next
      add   %g3, (1 << 3), %g3      !  entry
4:
    !
    ! We found an unlocked or invalid entry; we'll explicitly load
    ! the former index 0 entry here.
    !
    sethi   %hi(FLUSH_ADDR), %o4
    set MMU_TAG_ACCESS, %g4
    stxa    %o2, [%g4]ASI_IMMU
    stxa    %o1, [%g3]ASI_ITLB_ACCESS
    flush   %o4             ! Flush required for I-MMU
    retl
      wrpr  %g0, %o3, %pstate       ! Enable interrupts
    SET_SIZE(itlb_erratum34_fixup)

#endif  /* lint */

#if defined(lint)

/*ARGSUSED*/
void
dtlb_erratum34_fixup(void)
{}

#else   /* lint */

    !
    ! In Cheetah+ erratum 34, under certain conditions a DTLB locked
    ! index 0 TTE will erroneously be displaced when a new TTE is
    ! loaded.  In order to avoid cheetah+ erratum 34, locked index 0
    ! TTEs must be relocated.
    !
    ENTRY_NP(dtlb_erratum34_fixup)
    rdpr    %pstate, %o3
#ifdef DEBUG
    PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
#endif /* DEBUG */
    wrpr    %o3, PSTATE_IE, %pstate     ! Disable interrupts
    ldxa    [%g0]ASI_DTLB_ACCESS, %o1   ! %o1 = entry 0 data
    ldxa    [%g0]ASI_DTLB_TAGREAD, %o2  ! %o2 = entry 0 tag

    cmp %o1, %g0            ! Is this entry valid?
    bge %xcc, 1f
      andcc %o1, TTE_LCK_INT, %g0       ! Is this entry locked?
    bnz %icc, 2f
      nop
1:
    retl                    ! Nope, outta here...
      wrpr  %g0, %o3, %pstate       ! Enable interrupts
2:
    stxa    %g0, [%o2]ASI_DTLB_DEMAP    ! Flush this mapping
    membar  #Sync
    !
    ! Start search from index 1 up.
    !
    ! NOTE: We assume that we'll be successful in finding an unlocked
    ! or invalid entry.  If that isn't the case there are bound to
    ! bigger problems.
    !
    set (1 << 3), %g3
3:
    ldxa    [%g3]ASI_DTLB_ACCESS, %o4   ! Load TTE from t16
    !
    ! If this entry isn't valid, we'll choose to displace it (regardless
    ! of the lock bit).
    !
    cmp %o4, %g0            ! TTE is > 0 iff not valid
    bge %xcc, 4f            ! If invalid, go displace
      andcc %o4, TTE_LCK_INT, %g0       ! Check for lock bit
    bnz,a   %icc, 3b            ! If locked, look at next
      add   %g3, (1 << 3), %g3      !  entry
4:
    !
    ! We found an unlocked or invalid entry; we'll explicitly load
    ! the former index 0 entry here.
    !
    set MMU_TAG_ACCESS, %g4
    stxa    %o2, [%g4]ASI_DMMU
    stxa    %o1, [%g3]ASI_DTLB_ACCESS
    membar  #Sync
    retl
      wrpr  %g0, %o3, %pstate       ! Enable interrupts
    SET_SIZE(dtlb_erratum34_fixup)

#endif  /* lint */

#endif  /* CHEETAHPLUS_ERRATUM_34 */