common_asm.s revision 646e55b6807cdf761fecd1e4095d73116cdefdb5
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#if !defined(lint)
#include "assym.h"
#endif /* !lint */
/*
* General assembly language routines.
* It is the intent of this file to contain routines that are
* specific to cpu architecture.
*/
/*
* WARNING: If you add a fast trap handler which can be invoked by a
* non-privileged user, you may have to use the FAST_TRAP_DONE macro
* instead of "done" instruction to return back to the user mode. See
* comments for the "fast_trap_done" entry point for more information.
*/
#define FAST_TRAP_DONE \
/*
* Override GET_NATIVE_TIME for the cpu module code. This is not
* guaranteed to be exactly one instruction, be careful of using
* the macro in delay slots.
*
* Do not use any instruction that modifies condition codes as the
* caller may depend on these to remain unchanged across the macro.
*/
#elif defined(HUMMINGBIRD)
#include <sys/spitregs.h>
/*
* the current hummingbird version of %stick and %stick_cmp
* were both implemented as (2) 32-bit locations in ASI_IO space;
* the hdwr should support atomic r/w; meanwhile: ugly alert! ...
*
* 64-bit opcodes are required, but move only 32-bits:
*
* ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst
* stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys
*
* reg equivalent [phys]ASI_IO
* ------------------ ---------------
* %stick_cmp low-32 0x1FE.0000.F060
* %stick_cmp high-32 0x1FE.0000.F068
* %stick low-32 0x1FE.0000.F070
* %stick high-32 0x1FE.0000.F078
*/
/*
* Any change in the number of instructions in SETL41()
* will affect SETL41_OFF
*/
/*
* SETL41_OFF is used to calulate the relative PC value when a
* branch instruction needs to go over SETL41() macro
*/
#define SETL41_OFF 16
/*
* reading stick requires 2 loads, and there could be an intervening
* low-to-high 32-bit rollover resulting in a return value that is
* off by about (2 ^ 32); this rare case is prevented by re-reading
* the low-32 bits after the high-32 and verifying the "after" value
* is >= the "before" value; if not, increment the high-32 value.
*
* this method is limited to 1 rollover, and based on the fixed
* stick-frequency (5555555), requires the loads to complete within
* 773 seconds; incrementing the high-32 value will not overflow for
* about 52644 years.
*
* near 0xffffffff, there could be another rollover (also rare).
* to prevent this, we first write a 0 to the low-32, then write
* new values to the high-32 then the low-32.
*
* When we detect a carry in the lower %stick register, we need to
* read HST_HIGH again. However at the point where we detect this,
* we need to rebuild the register address HST_HIGH.This involves more
* than one instructions and a branch is unavoidable. However, most of
* the time, there is no carry. So we take the penalty of a branch
* instruction only when there is carry (less frequent).
*
* For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
* For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
* addr already points to HST_LOW.
*
* NOTE: this method requires disabling interrupts before using
* DELTA_NATIVE_TIME.
*/
#else /* !CHEETAH && !HUMMINGBIRD */
#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
/*
* Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
* The failure occurs only when the following instruction decodes to wr or
* wrpr. The workaround is to immediately follow writes to TICK_COMPARE
* with a read, thus stalling the pipe and keeping following instructions
* from causing data corruption. Aligning to a quadword will ensure these
* two instructions are not split due to i$ misses.
*/
.align 64 ;\
.bb_errata_1.label: ;\
#else /* BB_ERRATA_1 */
#endif /* BB_ERRATA_1 */
#endif /* !CHEETAH && !HUMMINGBIRD */
#if defined(lint)
#include <sys/lockstat.h>
#endif /* lint */
#include <sys/asm_linkage.h>
#include <sys/privregs.h>
#include <sys/machthread.h>
#include <sys/psr_compat.h>
#include <sys/isa_defs.h>
#include <sys/dditypes.h>
#if !defined(lint)
#include "assym.h"
#endif /* !lint */
#if defined(lint)
get_impl(void)
{ return (0); }
#else /* lint */
#endif /* lint */
#if defined(lint)
/*
* Softint generated when counter field of tick reg matches value field
* of tick_cmpr reg
*/
/*ARGSUSED*/
void
{}
#else /* lint */
1:
2:
#endif /* lint */
#if defined(lint)
void
tickcmpr_disable(void)
{}
#else /* lint */
#endif /* lint */
#if defined(lint)
/*
* tick_write_delta() increments %tick by the specified delta. This should
* only be called after a CPR event to assure that gethrtime() continues to
* increase monotonically. Obviously, writing %tick needs to de done very
* carefully to avoid introducing unnecessary %tick skew across CPUs. For
* this reason, we make sure we're i-cache hot before actually writing to
* %tick.
*/
/*ARGSUSED*/
void
{}
#else /* lint */
#ifdef DEBUG
.seg ".text"
.asciz "tick_write_delta: interrupts already disabled on entry"
#endif /* DEBUG */
#ifdef DEBUG
sethi %hi(tick_write_panic), %o1
save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
call panic
or %i1, %lo(tick_write_panic), %o0
#endif /* DEBUG */
0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
mov %o0, %o2
ba 0f ! Branch to cache line-aligned instr.
nop
.align 16
0: nop ! The next 3 instructions are now hot.
retl ! Return
wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
#endif /* lint */
#if defined(lint)
/*
* return 1 if disabled
*/
int
tickcmpr_disabled(void)
{ return (0); }
#else /* lint */
ENTRY_NP(tickcmpr_disabled)
RD_TICKCMPR(%g1, %o0)
retl
srlx %g1, TICKINT_DIS_SHFT, %o0
SET_SIZE(tickcmpr_disabled)
#endif /* lint */
/*
* Get current tick
*/
#if defined(lint)
u_longlong_t
gettick(void)
{ return (0); }
#else /* lint */
ENTRY(gettick)
GET_NATIVE_TIME(%o0, %o2, %o3)
retl
nop
SET_SIZE(gettick)
#endif /* lint */
/*
* Return the counter portion of the tick register.
*/
#if defined(lint)
uint64_t
gettick_counter(void)
{ return(0); }
#else /* lint */
ENTRY_NP(gettick_counter)
rdpr %tick, %o0
sllx %o0, 1, %o0
retl
srlx %o0, 1, %o0 ! shake off npt bit
SET_SIZE(gettick_counter)
#endif /* lint */
/*
* Provide a C callable interface to the trap that reads the hi-res timer.
* Returns 64-bit nanosecond timestamp in %o0 and %o1.
*/
#if defined(lint)
hrtime_t
gethrtime(void)
{
return ((hrtime_t)0);
}
hrtime_t
gethrtime_unscaled(void)
{
return ((hrtime_t)0);
}
hrtime_t
gethrtime_max(void)
{
return ((hrtime_t)0);
}
void
scalehrtime(hrtime_t *hrt)
{
*hrt = 0;
}
void
gethrestime(timespec_t *tp)
{
tp->tv_sec = 0;
tp->tv_nsec = 0;
}
time_t
gethrestime_sec(void)
{
return (0);
}
void
gethrestime_lasttick(timespec_t *tp)
{
tp->tv_sec = 0;
tp->tv_nsec = 0;
}
/*ARGSUSED*/
void
hres_tick(void)
{
}
void
panic_hres_tick(void)
{
}
#else /* lint */
ENTRY_NP(gethrtime)
GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
! %g1 = hrtime
retl
mov %g1, %o0
SET_SIZE(gethrtime)
ENTRY_NP(gethrtime_unscaled)
GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
retl
mov %g1, %o0
SET_SIZE(gethrtime_unscaled)
ENTRY_NP(gethrtime_waitfree)
ALTENTRY(dtrace_gethrtime)
GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
retl
mov %g1, %o0
SET_SIZE(dtrace_gethrtime)
SET_SIZE(gethrtime_waitfree)
ENTRY(gethrtime_max)
NATIVE_TIME_MAX(%g1)
NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
1:
/*
* Fast trap to return a timestamp, uses trap window, leaves traps
* disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1.
*
* This is the handler for the ST_GETHRTIME trap.
*/
/*
* Macro to convert GET_HRESTIME() bits into a timestamp.
*
* We use two separate macros so that the platform-dependent GET_HRESTIME()
* can be as small as possible; CONV_HRESTIME() implements the generic part.
*/
nop; /* delay: do nothing :( */ \
4:
/*
* Similar to gethrestime(), but gethrestime_sec() returns current hrestime
* seconds.
*/
/*
* Returns the hrestime on the last tick. This is simpler than gethrestime()
* and gethrestime_sec(): no conversion is required. gethrestime_lasttick()
* follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
* outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't
* rely on load dependencies to effect the membar #LoadLoad, instead declaring
* it explicitly.)
*/
0:
/*
* Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1.
*
* This is the handler for the ST_GETHRESTIME trap.
*/
/*
* Fast trap to return lwp virtual time, uses trap window, leaves traps
* disabled. Returns a 64-bit number in %o0:%o1, which is the number
* of nanoseconds consumed.
*
* This is the handler for the ST_GETHRVTIME trap.
*
* Register usage:
* %o0, %o1 = return lwp virtual time
* %o3 = lwp
* %g1 = scratch
* %g5 = scratch
*/
/*
* Subtract start time of current microstate from time
* of day to get increment for lwp virtual time.
*/
/*
* Add current value of ms_acct[LMS_USER]
*/
.seg ".text"
.asciz "hrtime_base stepping back"
8:
!
!
!
!
! hrestime_adj == 0 ?
1:
2:
5:
9:
!
!
#endif /* lint */
.seg ".text"
.asciz "kstat_q_exit: qlen == 0"
/*NOTREACHED*/
QRETURN; \
.align 16
.align 16
.align 16
.align 16
.align 16
.align 16
#endif /* !(lint || __lint) */
#ifdef lint
int hres_lock;
int traptrace_use_stick;
#else /* lint */
/*
* -- WARNING --
*
* The following variables MUST be together on a 128-byte boundary.
* In addition to the primary performance motivation (having them all
* on the same cache line(s)), code here and in the GET*TIME() macros
* assumes that they all have the same high 22 address bits (so
* there's only one sethi).
*/
.seg ".data"
/* XXX - above comment claims 128-bytes is necessary */
.align 64
.word 0, 0 /* int64_t */
.word 0, 0 /* hrtime_t */
.nword 0, 0 /* 2 longs */
.word 0, 0 /* int64_t */
.word 0
.word 0
.word 0, 0
.word 0
#endif /* lint */
/*
* usec_delay(int n) [compatibility - should go one day]
* Delay by spinning.
*
* delay for n microseconds. numbers <= 0 delay 1 usec
*
* With UltraSPARC-III the combination of supporting mixed-speed CPUs
* and variable clock rate for power management requires that we
* use %stick to implement this routine.
*
* For OPL platforms that support the "sleep" instruction, we
* conditionally (ifdef'ed) insert a "sleep" instruction in
* the loop. Note that theoritically we should have move (duplicated)
* is alot of code duplication just to add one "sleep" instruction.
* We chose less code duplication for this.
*/
#if defined(lint)
/*ARGSUSED*/
void
{}
/*ARGSUSED*/
void
usec_delay(int n)
{}
#else /* lint */
0:
GET_NATIVE_TIME(%o2, %o3, %o4)
add %o1, %o2, %o1
1:
#ifdef _OPL
.word 0x81b01060 ! insert "sleep" instruction
#endif /* _OPL */ ! use byte code for now
cmp %o1, %o2
GET_NATIVE_TIME(%o2, %o3, %o4)
bgeu,pt %xcc, 1b
nop
retl
nop
SET_SIZE(usec_delay)
SET_SIZE(drv_usecwait)
#endif /* lint */
#if defined(lint)
/* ARGSUSED */
void
pil14_interrupt(int level)
{}
#else /* lint */
/*
* Level-14 interrupt prologue.
*/
ENTRY_NP(pil14_interrupt)
CPU_ADDR(%g1, %g2)
rdpr %pil, %g6 ! %g6 = interrupted PIL
stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
rdpr %tstate, %g6
rdpr %tpc, %g5
btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
bnz,a,pt %xcc, 1f
stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
ba pil_interrupt_common ! must be large-disp branch
stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
1: ba pil_interrupt_common ! must be large-disp branch
stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
SET_SIZE(pil14_interrupt)
ENTRY_NP(tick_rtt)
!
! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
! disabled. If TICK_COMPARE is enabled, we know that we need to
! interrupt. In this case, TICK_COMPARE may have been rewritten
! in the future.
!
! Note that %o5 is live until after 1f.
! XXX - there is a subroutine call while %o5 is live!
!
RD_TICKCMPR(%o5, %g1)
srlx %o5, TICKINT_DIS_SHFT, %g1
brnz,pt %g1, 2f
nop
rdpr %pstate, %g5
andn %g5, PSTATE_IE, %g1
wrpr %g0, %g1, %pstate ! Disable vec interrupts
sethi %hi(cbe_level14_inum), %o1
ld [%o1 + %lo(cbe_level14_inum)], %o1
call intr_enqueue_req ! preserves %o5 and %g5
mov PIL_14, %o0
rd SOFTINT, %o4
set (TICK_INT_MASK | STICK_INT_MASK), %o0
andcc %o4, %o0, %g0
bz,a,pn %icc, 2f
wrpr %g0, %g5, %pstate ! Enable vec interrupts
wr %o0, CLEAR_SOFTINT
!
! we need to reprogram TICK_COMPARE to fire as soon as possible.
!
GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
sllx %o0, 1, %o0 ! Clear the DIS bit
srlx %o0, 1, %o0
cmp %o5, %o0 ! In the future?
bg,a,pt %xcc, 2f ! Yes, drive on.
wrpr %g0, %g5, %pstate ! delay: enable vec intr
!
!
#endif /* lint */
/* ARGSUSED */
{
return (0);
}
#else /* lint */
#ifdef DEBUG
.seg ".text"
.asciz "find_cpufrequency: interrupts already disabled on entry"
#endif /* DEBUG */
#ifdef DEBUG
#endif /* DEBUG */
0:
3:
1:
2:
#endif /* lint */
#if defined(lint)
/*
* Prefetch a page_t for write or read, this assumes a linear
* scan of sequential page_t's.
*/
/*ARGSUSED*/
void
prefetch_page_w(void *pp)
{}
/*ARGSUSED*/
void
prefetch_page_r(void *pp)
{}
#else /* lint */
defined(SERRANO)
!
!
!
! prefetch n+4 ahead of where we are now:
!
! 4 * sizeof(page_t) -> 512
! 4 * sizeof(page_t) +64 -> 576
!
! Example
! =======
! contiguous page array in memory...
!
! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
! ^ ^ ^ ^ ^ ^
! pp | pp+4*sizeof(page)+64
! |
! pp+4*sizeof(page)
!
! Prefetch
! Queue
! | XXX1 |
! +-------+
! | YYY1 | | working with pp = BBB1, but will be enqueueing
! +-------+ | prefetches to for both halves of page YYY,
! |Preftch| | while both halves of page XXX are in transit
! | YYY2 |<-+ make their way into the E$.
! +-------+
! |Preftch|
! | ZZZ1 |
! +-------+
! . .
! : :
!
! E$
! +============================================...
! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
! +============================================...
! | | | | | | |
! +============================================...
! .
! :
!
! So we should expect the first four page accesses to stall
! while we warm up the cache, afterwhich, most of the pages
! will have their pp ready in the E$.
!
! Also note that if sizeof(page_t) grows beyond 128, then
! etc.
!
! Cheetah+
! ========
!
#define STRIDE1 512
#define STRIDE2 576
#error "STRIDE1 != (PAGE_SIZE * 4)"
#endif /* STRIDE1 != (PAGE_SIZE * 4) */
!
!
!
!
! and
!
#define STRIDE1 128
#define STRIDE2 192
#error "STRIDE1 != PAGE_SIZE"
#endif /* STRIDE1 != PAGE_SIZE */
!
!
#define STRIDE1 0x440
#define STRIDE2 0x640
#else /* OLYMPUS_C */
#error "You need to fix this for your new cpu type."
#endif /* OLYMPUS_C */
#endif /* lint */
#if defined(lint)
/*
* Prefetch struct smap for write.
*/
/*ARGSUSED*/
void
prefetch_smap_w(void *smp)
{}
#else /* lint */
defined(SERRANO)
#define PREFETCH_Q_LEN 8
#define PREFETCH_Q_LEN 3
!
!
#define PREFETCH_Q_LEN 1
#else /* OLYMPUS_C */
#endif /* OLYMPUS_C */
#ifdef SEGKPM_SUPPORT
#define SMAP_SIZE 72
#else /* SEGKPM_SUPPORT */
!
!
!
#define SMAP_SIZE 48
#endif /* SEGKPM_SUPPORT */
#endif /* lint */
/* ARGSUSED */
getidsr(void)
{ return 0; }
#else /* lint */
#endif /* lint */