drmach_asm.s revision 03831d35f7499c87d51205817c93e9a8d42c4bae
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This file is through cpp before being used as
* an inline. It contains support routines used
* only by DR.
*/
#if defined(lint)
#include <sys/types.h>
#else
#include "assym.h"
#endif /* lint */
#include <sys/asm_linkage.h>
#include <sys/clock.h>
#include <sys/param.h>
#include <sys/privregs.h>
#include <sys/machasi.h>
#include <sys/mmu.h>
#include <sys/machthread.h>
#include <sys/pte.h>
#include <sys/stack.h>
#include <sys/vis.h>
#include <sys/cheetahregs.h>
#include <sys/cmpregs.h>
#include <sys/intreg.h>
#include <sys/cheetahasm.h>
#if defined(lint)
/*ARGSUSED*/
void
drmach_shutdown_asm(uint64_t estack, uint64_t flushaddr, int size)
{}
/*ARGSUSED*/
void
drmach_rename(uint64_t *script, uint_t *err, uint64_t *id)
{}
void
drmach_rename_end(void)
{}
/*ARGSUSED*/
void
drmach_rename_wait(uint64_t not_used_0, uint64_t not_used_1)
{
}
/*ARGSUSED*/
void
drmach_rename_done(uint64_t not_used_0, uint64_t not_used_1)
{
}
/*ARGSUSED*/
void
drmach_rename_abort(uint64_t not_used_0, uint64_t not_used_1)
{
}
/*ARGSUSED*/
uint64_t
lddsafconfig(uint64_t physaddr)
{
return (0x0ull);
}
/* ARGSUSED */
uint32_t
drmach_bc_bzero(void *addr, size_t size)
{
return (0x0);
}
#else /* lint */
#define BUS_SYNC(reg1, reg2) \
1: ;\
ldx [reg1], reg2 ;\
brz,pn reg2, 2f ;\
add reg1, 8, reg1 ;\
ldxa [reg2]ASI_MEM, %g0 ;\
ba,a 1b ;\
nop ;\
2:
#define LOAD_MB(cpuid, mb_data, reg1) \
set drmach_xt_mb, reg1 ;\
ldx [reg1], reg1 ;\
add reg1, cpuid, reg1 ;\
ldub [reg1], mb_data ;\
stub %g0, [reg1]
#define LPA_MASK 0x7ff8
#define SET_LPA(cmd, reg1, reg2) \
btst 0x80, cmd ;\
bz 2f ;\
nop ;\
btst 0x40, cmd ;\
bnz,a 1f ;\
mov %g0, cmd ;\
and cmd, 0x1f, cmd ;\
sllx cmd, 3, reg1 ;\
add cmd, 1, cmd ;\
sllx cmd, 9, cmd ;\
or cmd, reg1, cmd ;\
1: ;\
set LPA_MASK, reg2 ;\
ldxa [%g0]ASI_SAFARI_CONFIG, reg1 ;\
and cmd, reg2, cmd ;\
andn reg1, reg2, reg1 ;\
or reg1, cmd, reg1 ;\
stxa reg1, [%g0]ASI_SAFARI_CONFIG ;\
membar #Sync ;\
2: ;\
#define SET_NULL_LPA(reg1, reg2) \
set LPA_MASK, reg2 ;\
ldxa [%g0]ASI_SAFARI_CONFIG, reg1 ;\
andn reg1, reg2, reg1 ;\
stxa reg1, [%g0]ASI_SAFARI_CONFIG ;\
membar #Sync ;\
! ATOMIC_ADD_LONG
! This code is run at TL > 0, being exec'd via a cross trap.
! While running at trap level > 0, all memory accesses are
! performed using NUCLEUS context, which is always 0.
! Since the cross trap handler does not force PRIMARY context
! to be zero, the following casxa instruction must specify
! NUCLEUS ASI.
! This ASI must be specified explicitly (via casxa), rather
! than using casx. This is because of the fact that the
! default casx specifies ASI_PRIMARY, which if non-zero, can
! prevent the cpu from translating the address, leading to panic
! on bad trap following repetitive dtlb misses. This behavior
! was encountered on MCPUs when using casx instruction.
#define ATOMIC_ADD_LONG(label, simm, reg1, reg2, reg3) \
set label, reg1 ;\
ldx [reg1], reg2 ;\
1: ;\
add reg2, simm, reg3 ;\
casxa [reg1]ASI_N, reg2, reg3 ;\
cmp reg2, reg3 ;\
bne,a,pn %xcc, 1b ;\
ldx [reg1], reg2
#define HERE(reg1, simm, reg2) \
rdpr %tick, reg2 ;\
stx reg2, [reg1 + simm]
!
! Returns processor icache size and linesize in reg1 and
! reg2, respectively.
!
! Panther has a larger icache compared to Cheetahplus and
! Jaguar.
!
#define GET_ICACHE_PARAMS(reg1, reg2) \
GET_CPU_IMPL(reg1) ;\
cmp reg1, PANTHER_IMPL ;\
bne %xcc, 1f ;\
nop ;\
set PN_ICACHE_SIZE, reg1 ;\
set PN_ICACHE_LSIZE, reg2 ;\
ba 2f ;\
nop ;\
1: ;\
set CH_ICACHE_SIZE, reg1 ;\
set CH_ICACHE_LSIZE, reg2 ;\
2:
#define DRMACH_MCU_IDLE_READS 3
! Macro to check if a Panther MC is idle. The EMU Activity
! Status register is first read to clear the MCU status bit.
! The MCU status is then checked DRMACH_MCU_IDLE_READS times
! to verify the MCU is indeed idle. A single non-idle status
! will fail the idle check. This could be made more lenient
! by adding a retry loop.
! addr: Panther EMU Activity Status register read address.
! Assumed to be 0x18 for local ASI access or else
! FIREPLANE_ADDRESS_REG + 0x400050 for PIO access.
! 0 is returned in this register if MCU is idle and
! queues are empty. Otherwise, -1 is returned in this
! register.
! asi: Immediate asi value. Assumed to be ASI_SAFARI_CONFIG
! for local ASI or ASI_IO for PIO access.
! scr1: Scratch
! scr2: Scratch
!
#define CHECK_MCU_IDLE(addr, asi, scr1, scr2) \
ldxa [addr]asi, %g0 ;\
ba 1f ;\
clr scr2 ;\
0: ;\
btst MCU_ACT_STATUS, scr1 ;\
bne,a 2f ;\
sub %g0, 1, addr ;\
inc scr2 ;\
1: ;\
cmp scr2, DRMACH_MCU_IDLE_READS ;\
ble,a 0b ;\
ldxa [addr]asi, scr1 ;\
clr addr ;\
2:
! drmach_shutdown_asm
!
! inputs:
! %o0 = stack pointer
! %o1 = ecache flush address (ignored if cheetah+ processor)
! %o2 = ecache size
! %o3 = ecache line size
! %o4 = phys addr of byte to clear when finished
!
! output:
! Stores a zero at [%o4]ASI_MEM when the processor
! is ready to be removed from domain coherency.
!
ENTRY_NP(drmach_shutdown_asm)
membar #LoadStore ! parsley.
! Calculate pointer to data area. Determine size of
! drmach_shutdown_asm, add to base address and align
! to next 16 byte boundary. Leave result in %g6.
set drmach_shutdown_asm_end, %g6
set drmach_shutdown_asm, %g1
set drmach_cpu_sram_va, %g2
ldx [%g2], %g2
sub %g6, %g1, %g6
add %g6, %g2, %g6
add %g6, 15, %g6
andn %g6, 15, %g6
! Save parameters
stx %o0, [%g6 + 0] ! save stack pointer
stx %o1, [%g6 + 24] ! save E$ flush PA
st %o2, [%g6 + 32] ! save E$ size
st %o3, [%g6 + 36] ! save E$ linesize
stx %o4, [%g6 + 40] ! save phys addr of signal byte
set dcache_size, %g1
ld [%g1], %g1
st %g1, [%g6 + 8] ! save dcache_size
set dcache_linesize, %g1
ld [%g1], %g1
st %g1, [%g6 + 12] ! save dcache_linesize
GET_ICACHE_PARAMS(%g1, %g2)
st %g1, [%g6 + 16] ! save icache_size
st %g2, [%g6 + 20] ! save icache_linesize
! Flushes all active windows except the current one.
! Can cause spill traps to occur.
flushw
! Make sure all asynchronous processing is complete.
! Note: has no implications on pending bus transactions.
membar #Sync
! Move stack. Algorithm copied from t0stacktop setup of
! %sp in sun4u/ml/locore.s
! Replaces SWITCH_STACK() macro used in Starfire DR.
ldx [%g6 + 0], %g1
sub %g1, SA(KFPUSIZE+GSR_SIZE), %g2
and %g2, 0x3f, %g3
sub %g2, %g3, %o2
sub %o2, SA(MPCBSIZE) + STACK_BIAS, %sp
stx %sp, [%g6 + 48] ! for debug
HERE(%g6, 128, %g1) ! initialization complete (for debug)
! Panther needs to flush the L2 cache before the L3
! cache is flushed by the ecache flushall macro.
PN_L2_FLUSHALL(%g1, %g2, %g3)
! Flush E$. The purpose of this flush is to rid the E$ of
! lines in states O or Os. Implicitly flushes W$.
ldx [%g6 + 24], %g1 ! *ecache_flushaddr
ld [%g6 + 32], %g2 ! ecache_size
ld [%g6 + 36], %g3 ! ecache_linesize
ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
! Since the bus sync list read below does not guarantee
! transaction completion on Panther domains, as an
! optimization Panther skips the read and subsequent
! E$ flush.
GET_CPU_IMPL(%g1)
cmp %g1, PANTHER_IMPL
be %xcc, drmach_shutdown_ecache_flushed
nop
!
! Ensure all outstanding writebacks have retired. Following this
! sync, all writes must be strictly managed.
!
set drmach_bus_sync_list, %g1
BUS_SYNC(%g1, %g2)
! Flush E$ again to victimize references to drmach_bus_sync_list.
ldx [%g6 + 24], %g1 ! *ecache_flushaddr
ld [%g6 + 32], %g2 ! ecache_size
ld [%g6 + 36], %g3 ! ecache_linesize
ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
drmach_shutdown_ecache_flushed:
ld [%g6 + 8], %g1 ! flush dcache
ld [%g6 + 12], %g2
CH_DCACHE_FLUSHALL(%g1, %g2, %g3)
ld [%g6 + 16], %g1 ! flush icache
ld [%g6 + 20], %g2
CH_ICACHE_FLUSHALL(%g1, %g2, %g3, %g4)
PCACHE_FLUSHALL(%g1, %g2, %g3) ! flush pcache (no parameters)
!
! Flush all unlocked dtlb and itlb entries.
! Replaces TLB_FLUSH_UNLOCKED macro used in Starfire DR.
!
sethi %hi(FLUSH_ADDR), %g1
set DEMAP_ALL_TYPE, %g2
stxa %g0, [%g2]ASI_DTLB_DEMAP
stxa %g0, [%g2]ASI_ITLB_DEMAP
flush %g1
!
! Zero LPA by clearing CBASE and CBND. Following
! this, all transactions to cachable address space
! will be of the remote flavor.
!
SET_NULL_LPA(%g1, %g2)
HERE(%g6, 136, %g1) ! preparation complete (for debug)
!
! Clear byte to signal finished.
! NOTE: This store will allocate in the E$. It is
! vitally important that this line is demoted to
! state I before removing this processor from the
! coherency. The demotion is ensured by a synchronous
! "steal back" that takes place in drmach_cpu_poweroff.
ldx [%g6 + 40], %g1
stba %g0, [%g1]ASI_MEM
5:
HERE(%g6, 144, %g1) ! spin indicator (for debug)
ba 5b
nop
.asciz "drmach_shutdown_asm" ! for debug
.align 4
.global drmach_shutdown_asm_end
drmach_shutdown_asm_end:
SET_SIZE(drmach_shutdown_asm)
! lddsafconfig
!
! input:
! nothing
!
! output:
! %o0 content of this processor's SCR
!
! Returns current value of this processor's Safari
! Configuration Register.
!
ENTRY(lddsafconfig)
retl
ldxa [%g0]ASI_SAFARI_CONFIG, %o0
SET_SIZE(lddsafconfig)
! drmach_rename
!
! input:
! %o0 pointer to register address/value compound list
! %o1 address for setting error code if rename did not
! complete. Unmodified if no error.
! %o2 address for returning opaque memory controller id
! in case of error. Unmodified if no error.
! Global drmach_xt_mb[cpuid] is expected to be the new LPA.
!
! output:
! [%o1] = 1 if failed to idle memory controller, otherwise unmodified.
! [%o2] = id of failed memory controller, otherwise unmodified.
!
! Perform HW register reprogramming. This is the "rename" step for
! the copy-rename process. drmach_rename is copied to a cpu's sram
! followed by register address/value pairs -- the text and data are
! sourced from the sram while drmach_rename is executed.
!
! The parameter is assumed to point to a concatenation of six
! zero-terminated lists located in non-cachable storage. The assumed
! format (and purpose) of each list is as follows:
!
! 1) a copy of drmach_bus_sync_list. A list of PA for each
! active memory bank in the domain. Used to infer the
! the completion of all pending coherent transactions
! initiated by this processor. Assumes MC work queue
! does not implement read bypass. This is true of Cheetah,
! Cheetah+, and Jaguar processors. Panther does support
! read bypass, so for Panther MCs with read-bypass-write
! enabled, the read is issued but it does not guarantee
! completion of outstanding writes in the MC queue.
! 2) address/id pair for the local Panther EMU Activity Status
! Register of this processor. The register address is assumed
! to be a VA which is polled via ASI_SAFARI_CONFIG until the
! MC queues are empty. The id is an opaque identifier which
! must be returned along with an error code if the MCU status
! does not go idle. See the parameter description above.
! This section will be empty if this processor is not a Panther.
! Both the address and id are assumed to be 64 bit values.
! 3) address/id pairs for non-local Panther EMU Activity Status
! Registers on other source and target processors. The register
! address is assumed to be a PIO address which is polled via
! ASI_IO to drain/idle the MCs on other Panther procs. The
! id is an opaque identifier which must be returned along with
! an error code if a MC fails to go idle. This section will
! empty if there are no non-local Panther processors on the
! source and target expanders. Both the address and id are
! assumed to be 64 bit values.
! 4) address/value pairs for the Memory Address Decoder
! register of this processor. The register address is
! assumed to be a VA within ASM_MC_DECODE space. The
! address and value elements are assumed to 64 bit values.
! 5) address/value pairs for any 64 bit register accessible
! via ASI_IO. The address and value fields are assumed to
! be 64 bit values.
! This list is typically used for reprogramming the Memory
! Address Decoder Register of other cpus and for reprogram-
! ming the Safari Configuration Register of I/O controllers.
! 6) address/value pairs for any 32 bit register accessible
! via ASI_IO. The address element is assumed to be a 64 bit
! value. The value element is assumed to be a 64 bit word
! containing a 32 bit value in the lower half.
! This list typically contains address/value pairs for
! AXQ CASM tables.
!
ENTRY_NP(drmach_rename)
mov %o1, %o4 ! save error code address
mov %o2, %o5 ! save error id address
BUS_SYNC(%o0, %o1) ! run section 1
SET_NULL_LPA(%o1, %o2) ! prep for cachable transactions
! after rename completes.
! e.g.: the load_mb that occurs below
3:
ldx [%o0], %o1 ! run section 2
brz,a,pn %o1, 4f
add %o0, 8, %o0 ! skip section 2 terminator
CHECK_MCU_IDLE(%o1, ASI_SAFARI_CONFIG, %o2, %o3)
cmp %o1, 0 ! idled?
be,a 3b ! ok, advance
add %o0, 16, %o0
mov 1, %o1 ! not idle, bailout
stw %o1, [%o4] ! set MC idle error code
ldx [%o0 + 8], %o1
stx %o1, [%o5] ! set MC idle error id
retl
nop
4:
ldx [%o0], %o1 ! run section 3
brz,a,pn %o1, 5f
add %o0, 8, %o0 ! skip section 3 terminator
CHECK_MCU_IDLE(%o1, ASI_IO, %o2, %o3)
cmp %o1, 0 ! idled?
be,a 4b ! ok, advance
add %o0, 16, %o0
mov 1, %o1 ! not idle, bailout
stw %o1, [%o4] ! set MC idle error code
ldx [%o0 + 8], %o1
stx %o1, [%o5] ! set MC idle error id
retl
nop
5:
ldx [%o0], %o1 ! run section 4
brz,a,pn %o1, 6f
add %o0, 8, %o0 ! skip section 4 terminator
ldx [%o0 + 8], %o2
stxa %o2, [%o1]ASI_MC_DECODE
membar #Sync
ldxa [%o1]ASI_MC_DECODE, %g0 ! read back to insure written
b 5b
add %o0, 16, %o0
6:
ldx [%o0], %o1 ! run section 5
brz,a,pn %o1, 7f
add %o0, 8, %o0 ! skip section 5 terminator
ldx [%o0 + 8], %o2
stxa %o2, [%o1]ASI_IO
ldxa [%o1]ASI_IO, %g0 ! read back to insure written
b 6b
add %o0, 16, %o0
7:
ldx [%o0], %o1 ! run section 6
brz,a,pn %o1, 8f
nop
ldx [%o0 + 8], %o2
stwa %o2, [%o1]ASI_IO
lduwa [%o1]ASI_IO, %g0 ! read back to insure written
b 7b
add %o0, 16, %o0
8:
CPU_INDEX(%o0, %o1)
LOAD_MB(%o0, %o1, %o2)
SET_LPA(%o1, %o0, %o2)
retl
nop
.asciz "drmach_rename" ! for debug
.align 4
SET_SIZE(drmach_rename)
.global drmach_rename_end
drmach_rename_end:
! drmach_rename_wait
!
! input:
! nothing
!
! output:
! nothing
!
! drmach_rename_wait is a cross-trap function used to move a
! cpu's execution out of coherent space while a copy-rename
! operation is in progress.
!
! In each CPU SRAM exists an area (16KB on Cheetah+ boards,
! 32KB on Jaguar/Panther boards) reserved for DR. This area is
! logically divided by DR into 8KB pages, one page per CPU (or
! core) in a port pair. (Two Safari ports share HW resources on
! a CPU/MEM board. These are referred to as a port pair.)
!
! This routine begins by mapping the appropriate SRAM page,
! transferring the machine code (between the labels
! drmach_rename_wait_asm and drmach_rename_wait_asm_end), then
! jumping to SRAM. After returning from SRAM, the page is
! demapped before the cross-call is exited (sic).
!
! The machine code flushes all caches, waits for a special
! interrupt vector, then updates the processor's LPA and
! resynchronizes caches with the new home memory.
!
! The special interrupt vector is assumed to be a cross-call to
! drmach_rename_done sent by the master processor upon completing
! the copy-rename operation. The interrupt is received and discarded;
! The cross-call to drmach_rename_done is never executed. Instead
! the Interrupt Receive Status Register is employed, temporarily,
! as a semaphore. This avoids unwanted bus traffic during the critical
! rename operation.
!
ENTRY_NP(drmach_rename_wait)
CPU_INDEX(%g5, %g1) ! put cpuid in %g5
!
! sfmmu_dtlb_ld(drmach_cpu_sram_va,
! KCONTEXT, drmach_cpu_sram_tte[cpuid]);
! sfmmu_itlb_ld(drmach_cpu_sram_va,
! KCONTEXT, drmach_cpu_sram_tte[cpuid]);
!
set drmach_cpu_sram_tte, %g1
sllx %g5, 3, %g2
ldx [%g1 + %g2], %g3
set drmach_cpu_sram_va, %g1
ldx [%g1], %g1
or %g1, KCONTEXT, %g2 ! preserve %g1
set MMU_TAG_ACCESS, %g4
sethi %hi(ctx_pgsz_array), %g6
ldn [%g6 + %lo(ctx_pgsz_array)], %g6
brz %g6, 1f
nop
ldub [%g6 + KCONTEXT], %g6
sll %g6, TAGACCEXT_SHIFT, %g6
set MMU_TAG_ACCESS_EXT, %g7
stxa %g6, [%g7]ASI_DMMU
1:
stxa %g2, [%g4]ASI_DMMU
stxa %g3, [%g0]ASI_DTLB_IN
membar #Sync
sethi %hi(FLUSH_ADDR), %g6
stxa %g2, [%g4]ASI_IMMU
stxa %g3, [%g0]ASI_ITLB_IN
flush %g6
!
! copy drmach_rename_wait_asm block to SRAM. Preserve entry
! point in %g1. After the code has been copied, align %g6
! (the destination pointer) to the next highest 16 byte
! boundary. This will define the start of the data area.
!
mov %g1, %g6
set drmach_rename_wait_asm, %g2
set drmach_rename_wait_asm_end, %g3
0:
lduw [%g2], %g4 ! do copy
stw %g4, [%g6]
add %g2, 4, %g2
cmp %g2, %g3
bne 0b
add %g6, 4, %g6
add %g6, 15, %g6 ! locate data area on next 16 byte
andn %g6, 15, %g6 ! boundary following text
! WARNING: no bounds checking
jmpl %g1, %g7 ! jump to code in cpu sram
nop
set drmach_cpu_sram_va, %g1 ! vtab_flushpage_tl1(drmach_cpu_sram_va,
ldx [%g1], %g1 ! KCONTEXT);
set KCONTEXT, %g2
set MMU_PCONTEXT, %g4
or %g1, DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1
ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */
stxa %g2, [%g4]ASI_DMMU /* wr new ctxum */
stxa %g0, [%g1]ASI_DTLB_DEMAP
stxa %g0, [%g1]ASI_ITLB_DEMAP
stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */
retry
drmach_rename_wait_asm:
! the following code is copied to a cpu's sram and executed
! from there.
! Input:
! %g5 is cpuid
! %g6 is data area (follows text)
! %g7 is link address back to caller
!
st %g5, [%g6 + 4] ! save cpuid (for debug)
set dcache_size, %g1
ld [%g1], %g1
st %g1, [%g6 + 8] ! save dcache_size
set dcache_linesize, %g1
ld [%g1], %g1
st %g1, [%g6 + 12] ! save dcache_linesize
GET_ICACHE_PARAMS(%g1, %g2)
st %g1, [%g6 + 16] ! save icache_size
st %g2, [%g6 + 20] ! save icache_linesize
set drmach_iocage_paddr, %g1
ldx [%g1], %g1
stx %g1, [%g6 + 24] ! save *ecache_flushadr
mulx %g5, CPU_NODE_SIZE, %g1 ! %g4 = &cpunodes[cpuid]
set cpunodes, %g4
add %g4, %g1, %g4
ld [%g4 + ECACHE_SIZE], %g1
st %g1, [%g6 + 32] ! save ecache_size
ld [%g4 + ECACHE_LINESIZE], %g1
st %g1, [%g6 + 36] ! save ecache_linesize
LOAD_MB(%g5, %g1, %g2) ! save mailbox data
stb %g1, [%g6 + 40]
membar #Sync ! Complete any pending processing.
! Flush E$. The purpose of this flush is to rid the E$ of
! lines in states O or Os. Implicitly flushes W$.
! NOTE: Reading the bus sync list and r/w ops on drmach_xt_ready
! will disturb the E$. The lines of the bus sync list will be
! in state S. The line containing drmach_xt_ready will be in
! state O. Before proceeding with the copy-rename, the master
! processor will "steal back" the drmach_xt_ready (sic) line.
! This will demote the state of the line in E$ to I.
! However, the lines containing the bus sync list must be
! victimized before returning to the OS. This is vital because
! following copy-rename the corresponding lines in the new home
! memory will be in state gM. The resulting S,gM state pair is
! invalid and does represent a loss of coherency. Flushing the
! E$ after the bus sync list is read will be sufficient to
! avoid the invalid condition.
!
! For Panther, there is redundancy as both cores flush the shared
! L2 and L3 caches. As an optimization, only one core could do the
! flush of the shared caches, however care must be taken that the
! sibling core does not install owned lines once the flush begins.
PN_L2_FLUSHALL(%g1, %g2, %g3)
ldx [%g6 + 24], %g1 ! *ecache_flushaddr
ld [%g6 + 32], %g2 ! ecache_size
ld [%g6 + 36], %g3 ! ecache_linesize
ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
! Make sure all outstanding transactions for this processor
! have retired. See E$ note above.
set drmach_bus_sync_list, %g1
BUS_SYNC(%g1, %g2)
HERE(%g6, 128, %g4) ! preparation complete (for debug)
! Signal this processor is ready for rename operation to begin.
! See E$ note above.
ATOMIC_ADD_LONG(drmach_xt_ready, 1, %g2, %g3, %g4)
! Loop on IRSR waiting for interrupt. The expected interrupt
! is a cross-trap to drmach_wait_done. It is sent by the master
! processor when the copy-rename operation is complete. The
! received cross-trap is used only as a signal. It is not executed.
2:
HERE(%g6, 136, %g4) ! last poll tick (for debug)
ldxa [%g0]ASI_INTR_RECEIVE_STATUS, %g4 ! wait for xt
btst IRSR_BUSY, %g4
bz 2b
nop
stx %g4, [%g6 + 64] ! save status and payload
set IRDR_0, %g2
ldxa [%g2]ASI_INTR_RECEIVE, %g2
stx %g2, [%g6 + 72]
set IRDR_1, %g2
ldxa [%g2]ASI_INTR_RECEIVE, %g2
stx %g2, [%g6 + 80]
set IRDR_2, %g2
ldxa [%g2]ASI_INTR_RECEIVE, %g2
stx %g2, [%g6 + 88]
! clear rcv status
stxa %g0, [%g0]ASI_INTR_RECEIVE_STATUS
membar #Sync
HERE(%g6, 144, %g4) ! signal rcvd tick (for debug)
! Check for copy-rename abort signal. If this signal is received,
! the LPA change is skipped since the rename step was not done.
! The cache flushes are still done as paranoia.
set drmach_rename_abort, %g1
ldx [%g6 + 72], %g2
cmp %g1, %g2
be 3f
nop
! Resume waiting if this is not drmach_rename_done.
set drmach_rename_done, %g1
cmp %g1, %g2
bne 2b
nop
ldub [%g6 + 40], %g1 ! get saved mailbox data
SET_LPA(%g1, %g2, %g3) ! set LPA as indicated by the mb data
3:
! Flush all caches (E, D, I and P) to ensure each is resynchronized
! with the corresponding states in the new home memory. (W$ is
! implicitly flushed when the E$ is flushed.)
!
! Panther needs to flush the L2 cache before the L3
! cache is flushed by the ecache flushall macro.
PN_L2_FLUSHALL(%g1, %g2, %g3)
ldx [%g6 + 24], %g1 ! *ecache_flushaddr
ld [%g6 + 32], %g2 ! ecache_size
ld [%g6 + 36], %g3 ! ecache_linesize
ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
ld [%g6 + 8], %g1 ! flush dcache
ld [%g6 + 12], %g2
CH_DCACHE_FLUSHALL(%g1, %g2, %g3)
ld [%g6 + 16], %g1 ! flush icache
ld [%g6 + 20], %g2
CH_ICACHE_FLUSHALL(%g1, %g2, %g3, %g4)
PCACHE_FLUSHALL(%g1, %g2, %g3) ! flush pcache (no parameters)
HERE(%g6, 152, %g4) ! done tick (for debug)
jmpl %g7+8, %g0
nop
.asciz "drmach_rename_wait" ! for debug
.align 4
drmach_rename_wait_asm_end:
SET_SIZE(drmach_rename_wait)
! drmach_rename_done
!
! input:
! nothing
!
! output:
! nothing
!
! Used as signal data. See drmach_rename_wait.
!
ENTRY_NP(drmach_rename_done)
retry
SET_SIZE(drmach_rename_done)
! drmach_rename_abort
!
! input:
! nothing
!
! output:
! nothing
!
! Used as signal data. See drmach_rename_wait.
!
ENTRY_NP(drmach_rename_abort)
retry
SET_SIZE(drmach_rename_abort)
! drmach_set_lpa
!
! input:
! Globals: drmach_xt_mb[cpuid] contains new LPA data
!
! output:
! nothing
!
! Sets the executing processor's LPA as indicated by the command
! stored in drmach_xt_mb, a byte array indexed by cpuid. Assumes
! the caller is preventing illegal LPA settings and transistions.
!
ENTRY_NP(drmach_set_lpa)
!
! Set %g1 to this processor's cpuid.
!
CPU_INDEX(%g1, %g2)
!
! Get LPA message from mailbox, leave in %g5.
!
LOAD_MB(%g1, %g5, %g2)
!
! Set LPA, mailbox data in %g5.
!
SET_LPA(%g5, %g1, %g2)
!
! Signal work is done.
!
ATOMIC_ADD_LONG(drmach_xt_ready, 1, %g1, %g2, %g3)
retry
SET_SIZE(drmach_set_lpa)
!
! drmach_bc_bzero
!
! inputs:
! %o0 = base vaddr of area to clear (must be 64-byte aligned)
! %o1 = size of area to clear (must be multiple of 256 bytes)
!
! outputs:
! %o0 =
! 0 (success)
! 1 (size too small or not modulo 256)
! 2 (vaddr not 64-byte aligned)
!
! Zero a block of storage using block commit stores.
! Nonzero return if caller's address or size are not
! block aligned.
!
ENTRY(drmach_bc_bzero)
! verify size is >= 256 bytes
cmp %o1, 256
blu,a .bz_done
mov 1, %o0 ! error code 1 for invalid size
! verify size is a multiple of 256
btst (256-1), %o1
bnz,a .bz_done
mov 1, %o0 ! error code 1 for invalid size
! verify that vaddr is aligned for block stores
btst (64-1), %o0
bnz,a .bz_done
mov 2, %o0 ! error code 2 for invalid alignment
! save fprs for restore when finished
rd %fprs, %g1
! make sure FPU is enabled
rdpr %pstate, %g3
btst PSTATE_PEF, %g3
bnz .bz_block
nop
andn %g3, PSTATE_PEF, %g4
wrpr %g4, PSTATE_PEF, %pstate
.bz_block:
membar #StoreStore|#StoreLoad|#LoadStore
wr %g0, FPRS_FEF, %fprs
! Clear block
fzero %d0
fzero %d2
fzero %d4
fzero %d6
fzero %d8
fzero %d10
fzero %d12
fzero %d14
wr %g0, ASI_BLK_COMMIT_P, %asi
mov 256, %o3
ba .bz_doblock
nop
.bz_blkstart:
! stda %d0, [%o0+192]%asi ! in dly slot of branch that got us here
stda %d0, [%o0+128]%asi
stda %d0, [%o0+64]%asi
stda %d0, [%o0]%asi
add %o0, %o3, %o0
sub %o1, %o3, %o1
.bz_doblock:
cmp %o1, 256
bgeu,a %ncc, .bz_blkstart
stda %d0, [%o0+192]%asi
.bz_finish:
membar #StoreLoad|#StoreStore
clr %o0
wr %g1, %fprs ! restore fprs
btst PSTATE_PEF, %g3 ! restore pstate if necessary
bnz .bz_done
nop
wrpr %g3, %g0, %pstate
.bz_done:
membar #Sync
retl
nop
SET_SIZE(drmach_bc_bzero)
#endif /* lint */