i86_subr.s revision 79ec9da85c2648e2e165ce68612ad0cb6e185618
1N/A * The contents of this file are subject to the terms of the 1N/A * Common Development and Distribution License (the "License"). 1N/A * You may not use this file except in compliance with the License. 1N/A * See the License for the specific language governing permissions 1N/A * and limitations under the License. 1N/A * When distributing Covered Code, include this CDDL HEADER in each 1N/A * If applicable, add the following below this CDDL HEADER, with the 1N/A * fields enclosed by brackets "[]" replaced with your own identifying 1N/A * information: Portions Copyright [yyyy] [name of copyright owner] 1N/A * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 1N/A * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 1N/A * Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. 1N/A * Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T 1N/A * All Rights Reserved 1N/A * Copyright (c) 2009, Intel Corporation. 1N/A * All rights reserved. 1N/A * General assembly language routines. 1N/A * It is the intent of this file to contain routines that are 1N/A * independent of the specific kernel architecture, and those that are 1N/A * common across kernel architectures. 1N/A * As architectures diverge, and implementations of specific 1N/A * architecture-dependent routines change, the routines should be moved 1N/A * from this file into the respective ../`arch -k`/subr.s file. 1N/A * Catch lofault faults. Like setjmp except it returns one 1N/A * if code following causes uncorrectable fault. Turned off 1N/A * by calling no_fault(). * Default trampoline code for on_trap() (see <sys/ontrap.h>). We just * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called. * Push a new element on to the t_ontrap stack. Refer to <sys/ontrap.h> for * more information about the on_trap() mechanism. If the on_trap_data is the * same as the topmost stack element, we just modify that element. je 0f
/* don't modify t_ontrap */ je 0f
/* don't modify t_ontrap */ movl %
eax,
4(%
esp)
/* put %eax back on the stack */ * Setjmp and longjmp implement non-local gotos using state vectors #
endif /* LABEL_PC != 0 */ movq (%
rdi), %
rdx /* return address; LABEL_PC is 0 */ movl (%
edx), %
ecx /* %eip (return addr); LABEL_PC is 0 */ * if a() calls b() calls caller(), * caller() returns return address in a(). * (Note: We assume a() and b() are C routines which do the normal entry/exit * if a() calls callee(), callee() returns the movq (%
rsp), %
rax /* callee()'s return pc, in a() */ movl (%
esp), %
eax /* callee()'s return pc, in a() */ * return the current frame pointer * Invalidate a single page table entry in the TLB * Get/Set the value of various control registers * "lock mov %cr0" is used on processors which indicate it is * supported via CPUID. Normally the 32 bit TPR is accessed via /* rdx contains input arg3: hints */ .
byte 0x0f,
0x01,
0xc8 /* monitor */ .
byte 0x0f,
0x01,
0xc8 /* monitor */ .
byte 0x0f,
0x01,
0xc9 /* mwait */ .
byte 0x0f,
0x01,
0xc9 /* mwait */ .
byte 0x0f,
0x01,
0xf9 /* rdtscp instruction */ .
byte 0x0f,
0x01,
0xf9 /* rdtscp instruction */ * Do not use this function for obtaining clock tick. This * is called by callers who do not need to have a guarenteed * correct tick value. The proper routine to use is tsc_read(). * Insert entryp after predp in a doubly linked list. movq %
rax, (%
rdi)
/* entryp->forw = predp->forw */ movl %
eax, (%
ecx)
/* entryp->forw = predp->forw */ * Remove entryp from a doubly linked list movq %
rax, (%
rdx)
/* entry->back->forw = entry->forw */ movl %
eax, (%
edx)
/* entry->back->forw = entry->forw */ * non-NULL bytes in string argument. * This is close to a simple transliteration of a C version of this * routine. We should either just -make- this be a C version, or * justify having it in assembler by making it significantly faster. * if ((uintptr_t)s < KERNELBASE) testl $
3, %
eax /* if %eax not word aligned */ movl (%
eax), %
edx /* move 1 word from (%eax) to %edx */ addl $
0x7f7f7f7f, %
ecx /* %ecx += 0x7f7f7f7f */ andl $
0x80808080, %
ecx /* %ecx &= 0x80808080 */ cmpl $
0x80808080, %
ecx /* if no null byte in this word */ subl $
4, %
eax /* post-incremented */ cmpb $0, (%
eax)
/* if a byte in (%eax) is null */ testl $
3, %
eax /* if %eax not word aligned */ .
string "strlen: argument below kernelbase" * Berkeley 4.3 introduced symbolically named interrupt levels * as a way deal with priority in a machine independent fashion. * Numbered priorities are machine specific, and should be * discouraged where possible. * Note, for the machine specific priorities there are * examples listed for devices that use a particular priority. * It should not be construed that all devices of that * type should be at that priority. It is currently were * the current devices fit into the priority scheme based * upon time criticalness. * The underlying assumption of these assignments is that * IPL 10 is the highest level from which a device * routine can call wakeup. Devices that interrupt from higher * levels are restricted in what they can do. If they need * kernels services they should schedule a routine at a lower * level (via software interrupt) to do the required * Examples of this higher usage: * 14 Profiling clock (and PROM uart polling clock) * The serial ports request lower level processing on level 6. * Also, almost all splN routines (where N is a number or a * mnemonic) will do a RAISE(), on the assumption that they are * never used to lower our priority. * spl8() Because you can't be above 15 to begin with! * splzs() Because this is used at boot time to lower our * priority, to allow the PROM to poll the uart. * spl0() Used to lower priority to 0. int spl0(
void) {
return (0); }
int spl6(
void) {
return (0); }
int spl7(
void) {
return (0); }
int spl8(
void) {
return (0); }
int splhi(
void) {
return (0); }
int splzs(
void) {
return (0); }
call splr;
/* invoke common splr code */ \
addl $
4, %
esp;
/* unstack args */ \
/* locks out all interrupts, including memory errors */ /* just below the level that profiling runs */ /* sun specific - highest priority onboard serial i/o asy ports */ SETPRI(
12)
/* Can't be a RAISE, as it's used to lower us */ /* allow all interrupts */ /* splx implementation */ * Read and write the %gs register cmpb $
0xff, %
al /
If port's not there, we should get back 0xFF jz 2f / is clear, jump to perform the reset andb $0xfe, %al / otherwise, outb (%dx) / clear bit 0 first, then outb (%dx) / and reset the system testl $RESET_METHOD_PCI, pc_reset_methods testl $RESET_METHOD_PCI, pc_reset_methods(%rip) / Try the PCI (soft) reset vector (should work on all modern systems, / but has been shown to cause problems on 450NX systems, and some newer / systems (e.g. ATI IXP400-equipped systems)) / When resetting via this method, 2 writes are required. The first / targets bit 1 (0=hard reset without power cycle, 1=hard reset with / The reset occurs on the second write, during bit 2's
transition from * C callable in and out routines * The arguments and saved registers are on the stack in the * If additional values are pushed onto the stack, make sure * to adjust the following constants accordingly. * The arguments and saved registers are on the stack in the * If additional values are pushed onto the stack, make sure * to adjust the following constants accordingly. * Input a stream of 32-bit words. * NOTE: count is a DWORD count. * Output a stream of bytes * NOTE: count is a byte count * Output a stream of 32-bit words * NOTE: count is a DWORD count rep;
ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ decq %
rsi /* (fix post-increment) */ dec %
esi /* post-incremented */ * Replacement functions for ones that are normally inlined. * In addition to the copy in i86.il, they are defined here just in case. * Synthesize the PS_IE bit from the event mask bit * Synthesize the PS_IE bit from the event mask bit /* htonll(), ntohll(), htonl(), ntohl(), htons(), ntohs() * These functions reverse the byte order of the input parameter and returns * the result. This is to convert the byte order from host byte order * (little endian) to network byte order (big endian), or vice versa. /* XX64 there must be shorter sequences for this */ /* XX64 there must be better sequences for this */ * Since we're -really- running unprivileged, our attempt * to change the state of the IF bit will be ignored. * The virtual IF bit is tweaked by CLI and STI. * Since we're -really- running unprivileged, our attempt * to change the state of the IF bit will be ignored. * The virtual IF bit is tweaked by CLI and STI. * Synthesize the PS_IE bit from the event mask bit * Synthesize the PS_IE bit from the event mask bit * Since we're -really- running unprivileged, our attempt * to change the state of the IF bit will be ignored. The * virtual IF bit is tweaked by CLI and STI. * Since we're -really- running unprivileged, our attempt * to change the state of the IF bit will be ignored. The * virtual IF bit is tweaked by CLI and STI. rep;
ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ rep;
ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ * Checksum routine for Internet Protocol Headers unsigned int sum)
/* partial checksum */ unsigned int psum = 0;
/* partial sum */ while ((
psum >>
16) != 0) {
while ((
psum >>
16) != 0) {
.
string "ip_ocsum: address 0x%p below kernelbase\n" /* partial sum in %edx */ /* XX64 opportunities for prefetch? */ /* XX64 compute csum with 64 bit quantities? */ adcl 60(%
rsi), %
eax /* could be adding -1 and -1 with a carry */ adcl $0, %
eax /* could be adding -1 in eax with a carry */ shrl $
16, %
eax /* adding two halves of 32 bit checksum */ adcl 60(%
esi), %
eax /* We could be adding -1 and -1 with a carry */ adcl $0, %
eax /* we could be adding -1 in eax with a carry */ shrl $
16, %
eax /* adding two halves of 32 bit checksum */ * multiply two long numbers and yield a u_longlong_t result, callable from C. * Provided to manipulate hrtime_t values. shrq $
3, %
rsi /* convert %rsi from byte to quadword count */ movq %
rsi, %
rcx /* move count into rep control register */ movq %
rdi, %
rsi /* move addr into lodsq control reg. */ rep;
ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ movl 16(%
esp), %
ecx /* move 2nd arg into rep control register */ shrl $
2, %
ecx /* convert from byte count to word count */ movl 12(%
esp), %
esi /* move 1st arg into lodsw control register */ .
byte 0xf3 /* rep prefix. lame assembler. sigh. */ * Only a few of the hardware control registers or descriptor tables * are directly accessible to us, so just zero the structure. * XXPV Perhaps it would be helpful for the hypervisor to return * virtualized versions of these for post-mortem use. * (Need to reevaluate - perhaps it already does!) * Dump what limited information we can * Only a few of the hardware control registers or descriptor tables * are directly accessible to us, so just zero the structure. * XXPV Perhaps it would be helpful for the hypervisor to return * virtualized versions of these for post-mortem use. * (Need to reevaluate - perhaps it already does!) * Dump what limited information we can * A panic trigger is a word which is updated atomically and can only be set * once. We atomically store 0xDEFACEDD and load the old value. If the * previous value was 0, we succeed and return 1; otherwise return 0. * This allows a partially corrupt trigger to still trigger correctly. DTrace * has its own version of this function to allow it to panic correctly from * The panic() and cmn_err() functions invoke vpanic() as a common entry point * into the panic code implemented in panicsys(). vpanic() is responsible * for passing through the format string and arguments, and constructing a * regs structure on the stack into which it saves the current register * values. If we are not dying due to a fatal trap, these registers will * then be preserved in panicbuf as the current processor state. Before * invoking panicsys(), vpanic() activates the first panic trigger (see * common/os/panic.c) and switches to the panic_stack if successful. Note that * DTrace takes a slightly different panic path if it must panic from probe * context. Instead of calling panic, it calls into dtrace_vpanic(), which * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and * branches back into vpanic(). * The panic_trigger result is in %eax from the call above, and * dtrace_panic places it in %eax before branching here. * The rdmsr instructions that follow below will clobber %eax so * we stash the panic_trigger result in %r11d. * If panic_trigger() was successful, we are the first to initiate a * panic: we now switch to the reserved panic_stack before continuing. * Now that we've got everything set up, store the register values as * they were when we entered vpanic() to the designated location in * the regs structure we allocated on the stack. * panicsys(format, alist, rp, on_panic_stack) * If panic_trigger() was successful, we are the first to initiate a * panic: we now switch to the reserved panic_stack before continuing. * Now that we've got everything set up, store the register values as * they were when we entered vpanic() to the designated location in * the regs structure we allocated on the stack. * Synthesize the PS_IE bit from the event mask bit * initialized to a non zero value to make pc_gethrtime() * work correctly even before clock is initialized * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously, * hres_last_tick can only be modified while holding CLOCK_LOCK). * At worst, performing this now instead of under CLOCK_LOCK may * introduce some jitter in pc_gethrestime(). cmpb $0, (%
rax)
/* possible to get lock? */ * compute the interval since last time hres_tick was called * and adjust hrtime_base and hrestime accordingly * hrtime_base is an 8 byte value (in nsec), hrestime is * a timestruc_t (sec, nsec) * Now that we have CLOCK_LOCK, we can update hres_last_tick * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously, * hres_last_tick can only be modified while holding CLOCK_LOCK). * At worst, performing this now instead of under CLOCK_LOCK may * introduce some jitter in pc_gethrestime(). * compute the interval since last time hres_tick was called * and adjust hrtime_base and hrestime accordingly * hrtime_base is an 8 byte value (in nsec), hrestime is * timestruc_t (sec, nsec) * void prefetch_smap_w(void *) * Prefetch ahead within a linear list of smap structures. * Not implemented for ia32. Stub for compatibility. rep;
ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ * prefetch_page_r(page_t *) * issue prefetch instructions for a page_t rep;
ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ .
string "bcmp: arguments below kernelbase" leave /* restore stack */ leave /* restore stack */ * Save flags, do a 'cli' then return the saved flags * Restore the saved flags * Save flags, do a 'cli' then return the saved flags * Restore the saved flags rep;
ret /* use 2 byte instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ * Synthesize the PS_IE bit from the event mask bit * Synthesize the PS_IE bit from the event mask bit * VMware implements an I/O port that programs can query to detect if software * is running in a VMware hypervisor. This hypervisor port behaves differently * depending on magic values in certain registers and modifies some registers