i86pc/ml/locore.s

	locore.s revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
/*  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
/*    All Rights Reserved                   */

/*  Copyright (c) 1987, 1988 Microsoft Corporation      */
/*    All Rights Reserved                   */

#pragma ident   "%Z%%M% %I% %E% SMI"

#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
#include <sys/regset.h>
#include <sys/privregs.h>
#include <sys/psw.h>
#include <sys/reboot.h>
#include <sys/x86_archext.h>

#if defined(__lint)

#include <sys/types.h>
#include <sys/thread.h>
#include <sys/systm.h>
#include <sys/lgrp.h>
#include <sys/regset.h>
#include <sys/link.h>
#include <sys/bootconf.h>
#include <sys/bootsvcs.h>

#else   /* __lint */

#include <sys/segments.h>
#include <sys/pcb.h>
#include <sys/trap.h>
#include <sys/ftrace.h>
#include <sys/traptrace.h>
#include <sys/clock.h>
#include <sys/cmn_err.h>
#include <sys/pit.h>
#include <sys/panic.h>
#include "assym.h"

/*
 * Our assumptions:
 *  - We are running in protected-paged mode.
 *  - Interrupts are disabled.
 *  - The GDT and IDT are the callers; we need our copies.
 *  - The kernel's text, initialized data and bss are mapped.
 *
 * Our actions:
 *  - Save arguments
 *  - Initialize our stack pointer to the thread 0 stack (t0stack)
 *    and leave room for a phony "struct regs".
 *  - Our GDT and IDT need to get munged.
 *  - Since we are using the boot's GDT descriptors, we need
 *    to copy them into our GDT before we switch to ours.
 *  - We start using our GDT by loading correct values in the
 *    selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
 *    gs=KGS_SEL).
 *  - The default LDT entry for syscall is set.
 *  - We load the default LDT into the hardware LDT register.
 *  - We load the default TSS into the hardware task register.
 *  - Check for cpu type, i.e. 486 vs. P5 vs. P6 etc.
 *  - mlsetup(%esp) gets called.
 *  - We change our appearance to look like the real thread 0.
 *    (NOTE: making ourselves to be a real thread may be a noop)
 *  - main() gets called.  (NOTE: main() never returns).
 *
 * NOW, the real code!
 */

    /*
     * Globals:
     */
    .globl _start
    .globl  mlsetup
    .globl  main
    .globl  panic
    .globl  t0stack
    .globl  t0
    .globl  sysp
    .globl  edata

    /*
     * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h)
     */
    .globl  bootops
    .globl  bootopsp

    /*
     * NOTE: t0stack should be the first thing in the data section so that
     * if it ever overflows, it will fault on the last kernel text page.
     */
    .data
    .comm   t0stack, DEFAULTSTKSZ, 32
    .comm   t0, 4094, 32

#ifdef TRAPTRACE
    /*
     * trap_trace_ctl must agree with the structure definition in
     * <sys/traptrace.h>
     */
    DGDEF3(trap_trace_bufsize, CLONGSIZE, CLONGSIZE)
    .NWORD  TRAP_TSIZE

    DGDEF3(trap_trace_freeze, 4, 4)
    .long   0

    DGDEF3(trap_trace_off, 4, 4)
    .long   0

    DGDEF3(trap_trace_ctl, _MUL(4, CLONGSIZE), 16)
    .NWORD  trap_tr0        /* next record */
    .NWORD  trap_tr0        /* first record */
    .NWORD  trap_tr0 + TRAP_TSIZE   /* limit */
    .NWORD  0           /* pad */

#if NCPU != 21
#error "NCPU != 21, Expand padding for trap_trace_ctl"
#endif

    /*
     * Enough padding for 20 CPUs (no .skip on x86 -- grrrr).
     */
    .NWORD  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    .NWORD  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    .NWORD  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    .NWORD  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    .NWORD  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0

    /*
     * CPU 0's preallocated TRAPTRACE buffer.
     */
    .globl  trap_tr0
    .comm   trap_tr0, TRAP_TSIZE

    /*
     * A dummy TRAPTRACE entry to use after death.
     */
    .globl  trap_trace_postmort
    .comm   trap_trace_postmort, TRAP_ENT_SIZE
#endif  /* TRAPTRACE */

#endif  /* __lint */


#if defined(__amd64)

#if defined(__lint)

/* ARGSUSED */
void
_start(struct boot_syscalls *sysp, struct bootops *bop, Elf64_Boot *ebp)
{}

#else   /* __lint */

    ENTRY_NP(_start)

    /*
     * %rdi = boot services (should die someday)
     * %rdx = bootops
     * end
     */

    leaq    edata(%rip), %rbp   /* reference edata for ksyms */
    movq    $0, (%rbp)      /* limit stack back trace */

    /*
     * Initialize our stack pointer to the thread 0 stack (t0stack)
     * and leave room for a "struct regs" for lwp0.  Note that the
     * stack doesn't actually align to a 16-byte boundary until just
     * before we call mlsetup because we want to use %rsp to point at
     * our regs structure.
     */
    leaq    t0stack(%rip), %rsp
    addq    $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
#if (REGSIZE & 15) == 0
    subq    $8, %rsp
#endif
    /*
     * Save call back for special x86 boot services vector
     */
    movq    %rdi, sysp(%rip)

    movq    %rdx, bootops(%rip)     /* save bootops */
    movq    $bootops, bootopsp(%rip)

    /*
     * Save arguments and flags, if only for debugging ..
     */
    movq    %rdi, REGOFF_RDI(%rsp)
    movq    %rsi, REGOFF_RSI(%rsp)
    movq    %rdx, REGOFF_RDX(%rsp)
    movq    %rcx, REGOFF_RCX(%rsp)
    movq    %r8, REGOFF_R8(%rsp)
    movq    %r9, REGOFF_R9(%rsp)
    pushf
    popq    %r11
    movq    %r11, REGOFF_RFL(%rsp)

    DISABLE_INTR_FLAGS

    /*
     * Enable write protect and alignment check faults.
     */
    movq    %cr0, %rax
    orq $_CONST(CR0_WP|CR0_AM), %rax
    andq    $_BITNOT(CR0_WT|CR0_CE), %rax
    movq    %rax, %cr0

    /*
     * (We just assert this works by virtue of being here)
     */
    orl $X86_CPUID, x86_feature(%rip)

    /*
     * mlsetup() gets called with a struct regs as argument, while
     * main takes no args and should never return.
     */
    xorl    %ebp, %ebp
    movq    %rsp, %rdi
    pushq   %rbp
    /* (stack pointer now aligned on 16-byte boundary right here) */
    movq    %rsp, %rbp
    call    mlsetup
    call    main
    /* NOTREACHED */
    leaq    __return_from_main(%rip), %rdi
    xorl    %eax, %eax
    call    panic
    SET_SIZE(_start)

    /*
     * Kernel's handler to switch stacks before invoking
     * vmx's putchar handler.
     *
     * This is needed because we use the boot program for
     * console services for a -long- time before consconfig
     * takes them over.  In an ideal world, we'd have a console
     * configured earlier.
     *
     * We use this stack to execute bsys->putchar on, because
     * we may be asking vmx/boot to print something using a stack
     * that only exists in the 64-bit MMU e.g. a trapping thread.
     */
    .data
    .comm   _safe_putchar_stack, DEFAULTSTKSZ, 32

    ENTRY_NP(_stack_safe_putchar)
    .globl  _vmx_sysp
    movq    %rsp, %r11
    leaq    _safe_putchar_stack+DEFAULTSTKSZ(%rip), %rsp
    pushq   %r11
    movq    _vmx_sysp(%rip), %r10
    call    *BOOTSVCS_PUTCHAR(%r10)
    popq    %r11
    movq    %r11, %rsp
    ret
    SET_SIZE(_stack_safe_putchar)

#endif  /* __amd64 */
#endif  /* __lint */

#if !defined(__lint)

__return_from_main:
    .string "main() returned"
__unsupported_cpu:
    .string "486 style cpu detected - no longer supported!"

#endif  /* !__lint */

#if !defined(__amd64)

#if defined(__lint)

/*ARGSUSED*/
void
_start(struct boot_syscalls *sysp, struct bootops *bop, Elf32_Boot * ebp)
{}

#else   /* __lint */

    ENTRY_NP(_start)
    /*
     * We jump to __start to make sure the  function
     * enable_big_page_support does not span multiple 4K pages. This
     * is  essential, since we need to turn paging off in
     * enable_big_page_support
     */
    jmp __start

    /*
     * It is very important that this function sit right here in the file
     * This function should not span multiple pages. This function has
     * to be executed from an address such that we have 1-1 mapping.
     * First we disable paging, then we load %cr4 with %edx and then
     * enable paging. We return to  0xe00xxxx with paging enabled.
     */

    ENTRY_NP(enable_big_page_support)
    movl    %cr0, %eax
    and $_BITNOT(CR0_PG), %eax
    movl    %eax, %cr0  / disable paging
    movl    %cr3, %eax
    movl    %eax, %cr3
    movl    %cr4, %eax
    orl %ebx, %eax
    movl    %eax, %cr4
    nop
    movl    %cr0, %eax
    orl $CR0_PG, %eax   / enable paging
    movl    %eax, %cr0
    jmp enable_big_page_support_done
enable_big_page_support_done:
    xorl    %eax, %eax
    ret
    SET_SIZE(enable_big_page_support)

    /*
     * This function enables physical address extension thereby
     * switching from 32 bit pte's to 64 bit pte's
     *
     * It loads the argument that it received in %ebx into cr3
     *
     * XXX  is there some reason why this routine -can't- use
     *  normal C parameter passing conventions??
     */
    ENTRY_NP(enable_pae)
    /*
     * disable paging
     */
    movl    %cr0, %eax
    andl    $_BITNOT(CR0_PG), %eax
    movl    %eax, %cr0
    /*
     * enable physical address extension
     */
    movl    %cr4, %eax
    orl $CR4_PAE, %eax
    movl    %eax, %cr4
    /*
     * set new page table base
     */
    movl    %ebx, %cr3
    /*
     * restart paging
     */
    movl    %cr0, %eax
    orl $CR0_PG, %eax
    movl    %eax, %cr0
    jmp enable_pae_done     /* jmp is required after enabling paging */
enable_pae_done:
    /*
     * flush TLBs just in case and return
     */
    movl    %cr3, %eax
    movl    %eax, %cr3
    orl $CR4_PAE, cr4_value
    ret
    SET_SIZE(enable_pae)

__start:
    /*
     *  %ecx = boot services (should die someday)
     *  %ebx = bootops
     *
    mov $edata, %ebp        / edata needs to be defined for ksyms
    movl    $0, (%ebp)      / limit stack back trace

    /*
     * Initialize our stack pointer to the thread 0 stack (t0stack)
     * and leave room for a phony "struct regs".
     */
    movl    $t0stack + DEFAULTSTKSZ - REGSIZE, %esp

    /*
     * Save call back for special x86 boot services vector
     */
    mov %ecx, sysp      / save call back for boot services

    mov %ebx, bootops       / save bootops
    movl    $bootops, bootopsp

    DISABLE_INTR_FLAGS
    /*
     * Save all registers and flags
     */
    pushal
    pushfl

    /*
     * Override bios settings and enable write protect and
     * alignment check faults.
     */
    movl    %cr0, %eax

    /*
     * enable WP for detecting faults, and enable alignment checking.
     */
    orl $_CONST(CR0_WP|CR0_AM), %eax
    andl    $_BITNOT(CR0_WT|CR0_CE), %eax
    movl    %eax, %cr0      / set the cr0 register correctly and
                    / override the BIOS setup
    /*
     * If bit 21 of eflags can be flipped, then cpuid is present
     * and enabled.
     */
    pushfl
    popl    %ecx
    movl    %ecx, %eax
    xorl    $PS_ID, %eax        / try complemented bit
    pushl   %eax
    popfl
    pushfl
    popl    %eax
    cmpl    %eax, %ecx
    jne have_cpuid

    /*
     * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test
     * div does not modify the cc flags on Cyrix, even though this may
     * also be true for other vendors, this is generally true only for
     * newer models from those vendors that support and do not disable
     * cpuid (usually because cpuid cannot be disabled)
     */

    /*
     * clear cc flags
     */
    xorb    %ah, %ah
    sahf

    /*
     * perform 5/2 test
     */
    movw    $5, %ax
    movb    $2, %bl
    divb    %bl

    lahf
    cmpb    $2, %ah
    jne cpu_486

    /*
     * div did not modify the cc flags, chances are the vendor is Cyrix
     * assume the vendor is Cyrix and use the CCR's to enable cpuid
     */
    .set    CYRIX_CRI, 0x22     / CR Index Register
    .set    CYRIX_CRD, 0x23     / CR Data Register

    .set    CYRIX_CCR3, 0xc3    / Config Control Reg 3
    .set    CYRIX_CCR4, 0xe8    / Config Control Reg 4
    .set    CYRIX_DIR0, 0xfe    / Device Identification Reg 0
    .set    CYRIX_DIR1, 0xff    / Device Identification Reg 1

    /*
     * even if the cpu vendor is Cyrix and the motherboard/chipset
     * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port
     * 0x21 corresponds with 0x23 and since 0x22 is still untouched,
     * the reads and writes of 0x21 are guaranteed to be off-chip of
     * the cpu
     */

    /*
     * enable read of ISR at I/O port 0x20
     */
    movb    $0xb, %al
    outb    $MCMD_PORT

    /*
     * read IMR and store in %bl
     */
    inb $MIMR_PORT
    movb    %al, %bl

    /*
     * mask out all interrupts so that ISR will not change
     */
    movb    $0xff, %al
    outb    $MIMR_PORT

    /*
     * reads of I/O port 0x22 on Cyrix are always directed off-chip
     * make use of I/O pull-up to test for an unknown device on 0x22
     */
    inb $CYRIX_CRI
    cmpb    $0xff, %al
    je  port_22_free

    /*
     * motherboard/chipset vendor may be ignoring line A1 of I/O address
     */
    movb    %al, %cl

    /*
     * if the ISR and the value read from 0x22 do not match then we have
     * detected some unknown device, probably a chipset, at 0x22
     */
    inb $MCMD_PORT
    cmpb    %al, %cl
    jne restore_IMR

port_22_free:
    /*
     * now test to see if some unknown device is using I/O port 0x23
     *
     * read the external I/O port at 0x23
     */
    inb $CYRIX_CRD

    /*
     * Test for pull-up at 0x23 or if I/O address line A1 is being ignored.
     * IMR is 0xff so both tests are performed simultaneously.
     */
    cmpb    $0xff, %al
    jne restore_IMR

    /*
     * We are a Cyrix part. In case we are some model of Cx486 or a Cx586,
     * record the type and fix it later if not.
     */
    movl    $X86_VENDOR_Cyrix, x86_vendor
    movl    $X86_TYPE_CYRIX_486, x86_type

    /*
     * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3.
     *
     * load CCR3 index into CCR index register
     */

    movb    $CYRIX_CCR3, %al
    outb    $CYRIX_CRI

    /*
     * If we are not a Cyrix cpu, then we have performed an external I/O
     * cycle. If the CCR index was not valid for this Cyrix model, we may
     * have performed an external I/O cycle as well. In these cases and
     * if the motherboard/chipset vendor ignores I/O address line A1,
     * then the PIC will have IRQ3 set at the lowest priority as a side
     * effect of the above outb. We are reasonalbly confident that there
     * is not an unknown device on I/O port 0x22, so there should have been
     * no unpredictable side-effect of the above outb.
     */

    /*
     * read CCR3
     */
    inb $CYRIX_CRD

    /*
     * If we are not a Cyrix cpu the inb above produced an external I/O
     * cycle. If we are a Cyrix model that does not support CCR3 wex
     * produced an external I/O cycle. In all known Cyrix models 6x86 and
     * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all
     * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are
     * reserved as well. It is highly unlikely that CCR3 contains the value
     * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and
     * deduce we are not a Cyrix with support for cpuid if so.
     */
    cmpb    $0xff, %al
    je  restore_PIC

    /*
     * There exist 486 ISA Cyrix chips that support CCR3 but do not support
     * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O
     * cycles, the exact behavior is model specific and undocumented.
     * Unfortunately these external I/O cycles may confuse some PIC's beyond
     * recovery. Fortunatetly we can use the following undocumented trick:
     * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported.
     * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed
     * to work on all Cyrix cpu's which support cpuid.
     */
    movb    %al, %dl
    xorb    $0x10, %dl
    movb    %al, %cl

    /*
     * write back CRR3 with toggled bit 4 to CCR3
     */
    movb    $CYRIX_CCR3, %al
    outb    $CYRIX_CRI

    movb    %dl, %al
    outb    $CYRIX_CRD

    /*
     * read CCR3
     */
    movb    $CYRIX_CCR3, %al
    outb    $CYRIX_CRI
    inb $CYRIX_CRD
    movb    %al, %dl

    /*
     * restore CCR3
     */
    movb    $CYRIX_CCR3, %al
    outb    $CYRIX_CRI

    movb    %cl, %al
    outb    $CYRIX_CRD

    /*
     * if bit 4 was not toggled DIR0 and DIR1 are not supported in which
     * case we do not have cpuid anyway
     */
    andb    $0x10, %al
    andb    $0x10, %dl
    cmpb    %al, %dl
    je  restore_PIC

    /*
     * read DIR0
     */
    movb    $CYRIX_DIR0, %al
    outb    $CYRIX_CRI
    inb $CYRIX_CRD

    /*
     * test for pull-up
     */
    cmpb    $0xff, %al
    je  restore_PIC

    /*
     * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for
     * future use. If Cyrix ever produces a cpu that supports cpuid with
     * these ids, the following test will have to change. For now we remain
     * pessimistic since the formats of the CRR's may be different then.
     *
     * test for at least a 6x86, to see if we support both MAPEN and CPUID
     */
    cmpb    $0x30, %al
    jb  restore_IMR

    /*
     * enable MAPEN
     */
    movb    $CYRIX_CCR3, %al
    outb    $CYRIX_CRI

    andb    $0xf, %cl
    movb    %cl, %al
    orb $0x10, %al
    outb    $CYRIX_CRD

    /*
     * select CCR4
     */
    movb    $CYRIX_CCR4, %al
    outb    $CYRIX_CRI

    /*
     * read CCR4
     */
    inb $CYRIX_CRD

    /*
     * enable cpuid
     */
    orb $0x80, %al
    movb    %al, %dl

    /*
     * select CCR4
     */
    movb    $CYRIX_CCR4, %al
    outb    $CYRIX_CRI

    /*
     * write CCR4
     */
    movb    %dl, %al
    outb    $CYRIX_CRD

    /*
     * select CCR3
     */
    movb    $CYRIX_CCR3, %al
    outb    $CYRIX_CRI

    /*
     * disable MAPEN and write CCR3
     */
    movb    %cl, %al
    outb    $CYRIX_CRD

    /*
     * restore IMR
     */
    movb    %bl, %al
    outb    $MIMR_PORT

    /*
     * test to see if cpuid available
     */
    pushfl
    popl    %ecx
    movl    %ecx, %eax
    xorl    $PS_ID, %eax        / try complemented bit
    pushl   %eax
    popfl
    pushfl
    popl    %eax
    cmpl    %eax, %ecx
    jne have_cpuid
    jmp cpu_486

restore_PIC:
    /*
     * In case the motherboard/chipset vendor is ignoring line A1 of the
     * I/O address, we set the PIC priorities to sane values.
     */
    movb    $0xc7, %al  / irq 7 lowest priority
    outb    $MCMD_PORT

restore_IMR:
    movb    %bl, %al
    outb    $MIMR_PORT
    jmp cpu_486

have_cpuid:
    /*
     * cpuid instruction present
     */
    orl $X86_CPUID, x86_feature
    movl    $0, %eax
    cpuid

    movl    %ebx, cpu_vendor
    movl    %edx, cpu_vendor+4
    movl    %ecx, cpu_vendor+8

    /*
     * early cyrix cpus are somewhat strange and need to be
     * probed in curious ways to determine their identity
     */

    leal    cpu_vendor, %esi
    leal    CyrixInstead, %edi
    movl    $12, %ecx
    repz
      cmpsb
    je  vendor_is_cyrix

    / let mlsetup()/cpuid_pass1() handle everything else in C

    jmp cpu_done

is486:
    /*
     * test to see if a useful cpuid
     */
    testl   %eax, %eax
    jz  isa486

    movl    $1, %eax
    cpuid

    movl    %eax, %ebx
    andl    $0xF00, %ebx
    cmpl    $0x400, %ebx
    je  isa486

    ret
isa486:
    /*
     * lose the return address
     */
    popl    %eax
    jmp cpu_486

vendor_is_cyrix:
    call    is486

    /*
     * Processor signature and feature flags for Cyrix are insane.
     * BIOS can play with semi-documented registers, so cpuid must be used
     * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1
     * Keep the family in %ebx and feature flags in %edx until not needed
     */

    /*
     * read DIR0
     */
    movb    $CYRIX_DIR0, %al
    outb    $CYRIX_CRI
    inb $CYRIX_CRD

    /*
     * First we handle the cases where we are a 6x86 or 6x86L.
     * The 6x86 is basically a 486, the only reliable bit in the
     * feature flags is for FPU. The 6x86L is better, unfortunately
     * there is no really good way to distinguish between these two
     * cpu's. We are pessimistic and when in doubt assume 6x86.
     */

    cmpb    $0x40, %al
    jae maybeGX

    /*
     * We are an M1, either a 6x86 or 6x86L.
     */
    cmpb    $0x30, %al
    je  maybe6x86L
    cmpb    $0x31, %al
    je  maybe6x86L
    cmpb    $0x34, %al
    je  maybe6x86L
    cmpb    $0x35, %al
    je  maybe6x86L

    /*
     * although it is possible that we are a 6x86L, the cpu and
     * documentation are so buggy, we just do not care.
     */
    jmp likely6x86

maybe6x86L:
    /*
     *  read DIR1
     */
    movb    $CYRIX_DIR1, %al
    outb    $CYRIX_CRI
    inb $CYRIX_CRD
    cmpb    $0x22, %al
    jb  likely6x86

    /*
     * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags
     */
    movl    $X86_TYPE_CYRIX_6x86L, x86_type
    jmp coma_bug

likely6x86:
    /*
     * We are likely a 6x86, or a 6x86L without a way of knowing
     *
     * The 6x86 has NO Pentium or Pentium Pro compatible features even
     * though it claims to be a Pentium Pro compatible!
     *
     * The 6x86 core used in the 6x86 may have most of the Pentium system
     * registers and largely conform to the Pentium System Programming
     * Reference. Documentation on these parts is long gone. Treat it as
     * a crippled Pentium and hope for the best.
     */

    movl    $X86_TYPE_CYRIX_6x86, x86_type
    jmp coma_bug

maybeGX:
    /*
     * Now we check whether we are a MediaGX or GXm. We have particular
     * reason for concern here. Even though most of the GXm's
     * report having TSC in the cpuid feature flags, the TSC may be
     * horribly broken. What is worse, is that MediaGX's are basically
     * 486's while the good GXm's are more like Pentium Pro's!
     */

    cmpb    $0x50, %al
    jae maybeM2

    /*
     * We are either a MediaGX (sometimes called a Gx86) or GXm
     */

    cmpb    $41, %al
    je  maybeMediaGX

    cmpb    $44, %al
    jb  maybeGXm

    cmpb    $47, %al
    jbe maybeMediaGX

    /*
     * We do not honestly know what we are, so assume a MediaGX
     */
    jmp media_gx

maybeGXm:
    /*
     * It is still possible we are either a MediaGX or GXm, trust cpuid
     * family should be 5 on a GXm
     */
    cmpl    $0x500, %ebx
    je  GXm

    /*
     * BIOS/Cyrix might set family to 6 on a GXm
     */
    cmpl    $0x600, %ebx
    jne media_gx

GXm:
    movl    $X86_TYPE_CYRIX_GXm, x86_type
    jmp cpu_done

maybeMediaGX:
    /*
     * read DIR1
     */
    movb    $CYRIX_DIR1, %al
    outb    $CYRIX_CRI
    inb $CYRIX_CRD

    cmpb    $0x30, %al
    jae maybeGXm

    /*
     * we are a MediaGX for which we do not trust cpuid
     */
media_gx:
    movl    $X86_TYPE_CYRIX_MediaGX, x86_type
    jmp cpu_486

maybeM2:
    /*
     * Now we check whether we are a 6x86MX or MII. These cpu's are
     * virtually identical, but we care because for the 6x86MX, we
     * must work around the coma bug. Also for 6x86MX prior to revision
     * 1.4, the TSC may have serious bugs.
     */

    cmpb    $0x60, %al
    jae maybeM3

    /*
     * family should be 6, but BIOS/Cyrix might set it to 5
     */
    cmpl    $0x600, %ebx
    ja  cpu_486

    /*
     *  read DIR1
     */
    movb    $CYRIX_DIR1, %al
    outb    $CYRIX_CRI
    inb $CYRIX_CRD

    cmpb    $0x8, %al
    jb  cyrix6x86MX
    cmpb    $0x80, %al
    jb  MII

cyrix6x86MX:
    /*
     * It is altogether unclear how the revision stamped on the cpu
     * maps to the values in DIR0 and DIR1. Just assume TSC is broken.
     */
    movl    $X86_TYPE_CYRIX_6x86MX, x86_type
    jmp coma_bug

MII:
    movl    $X86_TYPE_CYRIX_MII, x86_type
likeMII:
    jmp cpu_done

maybeM3:
    /*
     * We are some chip that we cannot identify yet, an MIII perhaps.
     * We will be optimistic and hope that the chip is much like an MII,
     * and that cpuid is sane. Cyrix seemed to have gotten it right in
     * time for the MII, we can only hope it stayed that way.
     * Maybe the BIOS or Cyrix is trying to hint at something
     */
    cmpl    $0x500, %ebx
    je  GXm

    cmpb    $0x80, %al
    jae likelyM3

    /*
     * Just test for the features Cyrix is known for
     */

    jmp MII

likelyM3:
    /*
     * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka
     * the Cyrix MIII. There may be parts later that use the same ranges
     * for DIR0 with special values in DIR1, maybe the VIA CIII, but for
     * now we will call anything with a DIR0 of 0x80 or higher an MIII.
     * The MIII is supposed to support large pages, but we will believe
     * it when we see it. For now we just enable and test for MII features.
     */
    movl    $X86_TYPE_VIA_CYRIX_III, x86_type
    jmp likeMII

coma_bug:

/*
 * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some
 * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus
 * cycles except page table accesses and interrupt ACK cycles do not assert
 * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0.
 * Due to a bug in the cpu core involving over-optimization of branch
 * prediction, register renaming, and execution of instructions down both the
 * X and Y pipes for the xchgl instruction, short loops can be written that
 * never de-assert LOCK# from one invocation of the loop to the next, ad
 * infinitum. The undesirable effect of this situation is that interrupts are
 * not serviced. The ideal workaround to this bug would be to set NO_LOCK to
 * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no
 * longer do, unless they are page table accesses or interrupt ACK cycles.
 * With LOCK# not asserted, these bus cycles are now cached. This can cause
 * undesirable behaviour if the ARR's are not configured correctly. Solaris
 * does not configure the ARR's, nor does it provide any useful mechanism for
 * doing so, thus the ideal workaround is not viable. Fortunately, the only
 * known exploits for this bug involve the xchgl instruction specifically.
 * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and
 * 6x86MX cpu's which can be used to specify one instruction as a serializing
 * instruction. With the xchgl instruction serialized, LOCK# is still
 * asserted, but it is the sole instruction for which LOCK# is asserted.
 * There is now some added penalty for the xchgl instruction, but the usual
 * bus locking is preserved. This ingenious workaround was discovered by
 * disassembling a binary provided by Cyrix as a workaround for this bug on
 * Windows, but its not documented anywhere by Cyrix, nor is the bug actually
 * mentioned in any public errata! The only concern for this workaround is
 * that there may be similar undiscovered bugs with other instructions that
 * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix
 * fixed this bug sometime late in 1997 and no other exploits other than
 * xchgl have been discovered is good indication that this workaround is
 * reasonable.
 */

    .set    CYRIX_DBR0, 0x30    / Debug Register 0
    .set    CYRIX_DBR1, 0x31    / Debug Register 1
    .set    CYRIX_DBR2, 0x32    / Debug Register 2
    .set    CYRIX_DBR3, 0x33    / Debug Register 3
    .set    CYRIX_DOR, 0x3c     / Debug Opcode Register

    /*
     * What is known about DBR1, DBR2, DBR3, and DOR is that for normal
     * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode
     * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f,
     * and 0xff. Then, DOR is loaded with the one byte opcode.
     */

    /*
     * select CCR3
     */
    movb    $CYRIX_CCR3, %al
    outb    $CYRIX_CRI

    /*
     * read CCR3 and mask out MAPEN
     */
    inb $CYRIX_CRD
    andb    $0xf, %al

    /*
     * save masked CCR3 in %ah
     */
    movb    %al, %ah

    /*
     * select CCR3
     */
    movb    $CYRIX_CCR3, %al
    outb    $CYRIX_CRI

    /*
     * enable MAPEN
     */
    movb    %ah, %al
    orb $0x10, %al
    outb    $CYRIX_CRD

    /*
     * read DBR0
     */
    movb    $CYRIX_DBR0, %al
    outb    $CYRIX_CRI
    inb $CYRIX_CRD

    /*
     * disable MATCH and save in %bh
     */
    orb $0x80, %al
    movb    %al, %bh

    /*
     * write DBR0
     */
    movb    $CYRIX_DBR0, %al
    outb    $CYRIX_CRI
    movb    %bh, %al
    outb    $CYRIX_CRD

    /*
     * write DBR1
     */
    movb    $CYRIX_DBR1, %al
    outb    $CYRIX_CRI
    movb    $0xf8, %al
    outb    $CYRIX_CRD

    /*
     * write DBR2
     */
    movb    $CYRIX_DBR2, %al
    outb    $CYRIX_CRI
    movb    $0x7f, %al
    outb    $CYRIX_CRD

    /*
     * write DBR3
     */
    movb    $CYRIX_DBR3, %al
    outb    $CYRIX_CRI
    xorb    %al, %al
    outb    $CYRIX_CRD

    /*
     * write DOR
     */
    movb    $CYRIX_DOR, %al
    outb    $CYRIX_CRI
    movb    $0x87, %al
    outb    $CYRIX_CRD

    /*
     * enable MATCH
     */
    movb    $CYRIX_DBR0, %al
    outb    $CYRIX_CRI
    movb    %bh, %al
    andb    $0x7f, %al
    outb    $CYRIX_CRD

    /*
     * disable MAPEN
     */
    movb    $0xc3, %al
    outb    $CYRIX_CRI
    movb    %ah, %al
    outb    $CYRIX_CRD

    jmp cpu_done

cpu_done:
    popfl                   /* Restore original FLAGS */
    popal                   /* Restore all registers */

    /*
     *  mlsetup(%esp) gets called.
     */
    pushl   %esp
    call    mlsetup
    addl    $4, %esp

    /*
     * We change our appearance to look like the real thread 0.
     * (NOTE: making ourselves to be a real thread may be a noop)
     * main() gets called.  (NOTE: main() never returns).
     */
    call    main
    /* NOTREACHED */
    pushl   $__return_from_main
    call    panic

    /* NOTREACHED */
cpu_486:
    pushl   $__unsupported_cpu
    call    panic
    SET_SIZE(_start)

#endif  /* __lint */
#endif  /* !__amd64 */


/*
 *  For stack layout, see reg.h
 *  When cmntrap gets called, the error code and trap number have been pushed.
 */

#if defined(__lint)

/* ARGSUSED */
void
cmntrap()
{}

#else   /* __lint */

    .globl  trap        /* C handler called below */

#if defined(__amd64)

    ENTRY_NP2(cmntrap, _cmntrap)

    TRAP_PUSH

    /*
     * We must first check if DTrace has set its NOFAULT bit.  This
     * regrettably must happen before the TRAPTRACE data is recorded,
     * because recording the TRAPTRACE data includes obtaining a stack
     * trace -- which requires a call to getpcstack() and may induce
     * recursion if an fbt::getpcstack: enabling is inducing the bad load.
     */
    movl    %gs:CPU_ID, %eax
    shlq    $CPU_CORE_SHIFT, %rax
    leaq    cpu_core(%rip), %r8
    addq    %r8, %rax
    movw    CPUC_DTRACE_FLAGS(%rax), %cx
    andw    $CPU_DTRACE_NOFAULT, %cx
    jnz .dtrace_induced

    TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
    TRACE_REGS(%rdi, %rsp, %rbx, %rcx)  /* Uses label 9 */
    TRACE_STAMP(%rdi)       /* Clobbers %eax, %edx, uses 9 */

    /*
     * Getting a stack trace as part of TRACE_STACK() requires a call to
     * getpcstack().  Before we can do this, we must move our stack pointer
     * into the base pointer, and we must save the value of %cr2.  %cr2
     * must be saved before the call because getpcstack() may be
     * instrumented by DTrace -- and any invalid load in the DTrace
     * enabling will clobber %cr2.
     */
    movq    %rsp, %rbp
    movq    %cr2, %r12

    TRACE_STACK(%rdi)

/   XXX - check for NMIs????

    / If this is a debug trap, save %dr6 in the pcb
    cmpl    $T_SGLSTP, REGOFF_TRAPNO(%rbp)
    jne .L11
    movq    %gs:CPU_LWP, %rax
    orq %rax, %rax
    jz  .L11        /* null lwp pointer; can't happen? */
    movq    %db6, %rcx
    movq    %rcx, LWP_PCB_DRSTAT(%rax)
    movl    $0, %ecx
    movq    %rcx, %db6  /* clear debug status register */
.L11:
    movq    %rbp, %rdi
    movq    %r12, %rsi
    movl    %gs:CPU_ID, %edx

    /
    / We know that this isn't a DTrace non-faulting load; we can now safely
    / reenable interrupts.  (In the case of pagefaults, we enter through an
    / interrupt gate.)
    /
    ENABLE_INTR_FLAGS

    call    trap        /* trap(rp, addr, cpuid) handles all traps */

    jmp _sys_rtt
.dtrace_induced:
    movq    %rsp, %rbp
    testw   $CPL_MASK, REGOFF_CS(%rbp)  /* test CS for user-mode trap */
    jnz 2f              /* if from user, panic */

    cmpl    $T_PGFLT, REGOFF_TRAPNO(%rbp)
    je  0f              /* if not a pagefault, panic */

    cmpl    $T_GPFLT, REGOFF_TRAPNO(%rbp)
    jne 3f

    /*
     * If we've taken a GPF, we don't (unfortunately) have the address that
     * induced the fault.  So instead of setting the fault to BADADDR,
     * we'll set the fault to ILLOP.
     */
    orw $CPU_DTRACE_ILLOP, %cx
    movw    %cx, CPUC_DTRACE_FLAGS(%rax)
    jmp 1f
0:
    orw $CPU_DTRACE_BADADDR, %cx
    movw    %cx, CPUC_DTRACE_FLAGS(%rax)    /* set fault to bad addr */
    movq    %cr2, %rcx
    movq    %rcx, CPUC_DTRACE_ILLVAL(%rax)
                        /* fault addr is illegal value */
1:
    movq    REGOFF_RIP(%rbp), %rdi
    movq    %rdi, %r12
    call    dtrace_instr_size
    addq    %rax, %r12
    movq    %r12, REGOFF_RIP(%rbp)
    INTR_POP
    iretq
2:
    leaq    dtrace_badflags(%rip), %rdi
    xorl    %eax, %eax
    call    panic
3:
    leaq    dtrace_badtrap(%rip), %rdi
    xorl    %eax, %eax
    call    panic

    SET_SIZE(cmntrap)
    SET_SIZE(_cmntrap)

#elif defined(__i386)

    ENTRY_NP2(cmntrap, _cmntrap)

    INTR_PUSH

    TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
    TRACE_REGS(%edi, %esp, %ebx, %ecx)  /* Uses label 9 */
    TRACE_STAMP(%edi)       /* Clobbers %eax, %edx, uses 9 */

    movl    %esp, %ebp

/   XXX - check for NMIs????

    / If this is a debug trap, save %dr6 in the pcb
    cmpl    $T_SGLSTP, REGOFF_TRAPNO(%ebp)
    jne .L11
    movl    %gs:CPU_LWP, %eax
    orl %eax, %eax
    jz  .L11        /* null lwp pointer; can't happen? */
    movl    %db6, %ecx
    movl    %ecx, LWP_PCB_DRSTAT(%eax)
    movl    $0, %ecx
    movl    %ecx, %db6  /* clear debug status register */
.L11:
    pushl   %gs:CPU_ID
    movl    %cr2, %eax  / fault address for PGFLTs
    pushl   %eax
    pushl   %ebp

    /*
     * We must now check if DTrace has set its NOFAULT bit.
     */
    movl    %gs:CPU_ID, %eax
    shll    $CPU_CORE_SHIFT, %eax
    addl    $cpu_core, %eax
    movw    CPUC_DTRACE_FLAGS(%eax), %cx
    andw    $CPU_DTRACE_NOFAULT, %cx
    jnz 0f

    /*
     * We know that this isn't a DTrace non-faulting load; we can now safely
     * reenable interrupts.  (In the case of pagefaults, we enter through an
     * interrupt gate.)
     */
    ENABLE_INTR_FLAGS

    call    trap        /* trap(rp, addr, cpuid) handles all traps */
    addl    $12, %esp   /* get argument off stack */

    jmp _sys_rtt

0:
    testw   $CPL_MASK, REGOFF_CS(%ebp)  /* test CS for user-mode trap */
    jnz 2f              /* if from user, panic */

    cmpl    $T_PGFLT, REGOFF_TRAPNO(%ebp)
    je  0f              /* if not a pagefault, panic */

    cmpl    $T_GPFLT, REGOFF_TRAPNO(%ebp)
    jne 3f

    /*
     * If we've taken a GPF, we don't (unfortunately) have the address that
     * induced the fault.  So instead of setting the fault to BADADDR,
     * we'll set the fault to ILLOP.
     */
    orw $CPU_DTRACE_ILLOP, %cx
    movw    %cx, CPUC_DTRACE_FLAGS(%eax)
    jmp 1f

0:
    orw $CPU_DTRACE_BADADDR, %cx
    movw    %cx, CPUC_DTRACE_FLAGS(%eax)    /* set fault to bad addr */
    movl    %cr2, %ecx
    movl    %ecx, CPUC_DTRACE_ILLVAL(%eax)
                        /* fault addr is illegal value */
1:
    pushl   REGOFF_EIP(%ebp)
    call    dtrace_instr_size
    addl    $16, %esp
    movl    REGOFF_EIP(%ebp), %ecx
    addl    %eax, %ecx
    movl    %ecx, REGOFF_EIP(%ebp)
    INTR_POP_KERNEL
    iret
2:
    pushl   $dtrace_badflags
    call    panic

3:
    pushl   $dtrace_badtrap
    call    panic

    SET_SIZE(cmntrap)
    SET_SIZE(_cmntrap)

#endif  /* __i386 */

/*
 * Declare a uintptr_t which has the size of _cmntrap to enable stack
 * traceback code to know when a regs structure is on the stack.
 */
    .globl  _cmntrap_size
    .align  CLONGSIZE
_cmntrap_size:
    .NWORD  . - _cmntrap
    .type   _cmntrap_size, @object

dtrace_badflags:
    .string "bad DTrace flags"

dtrace_badtrap:
    .string "bad DTrace trap"

#endif  /* __lint */

#if defined(__lint)

/* ARGSUSED */
void
cmninttrap()
{}

#else   /* __lint */

    .globl  trap        /* C handler called below */

#if defined(__amd64)

    ENTRY_NP(cmninttrap)

    INTR_PUSH

    TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
    TRACE_REGS(%rdi, %rsp, %rbx, %rcx)  /* Uses label 9 */
    TRACE_STAMP(%rdi)       /* Clobbers %eax, %edx, uses 9 */

    DISABLE_INTR_FLAGS

    movq    %rsp, %rbp

    movl    %gs:CPU_ID, %edx
    xorl    %esi, %esi
    movq    %rsp, %rdi
    call    trap        /* trap(rp, addr, cpuid) handles all traps */

    jmp _sys_rtt
    SET_SIZE(cmninttrap)

#elif defined(__i386)

    ENTRY_NP(cmninttrap)

    INTR_PUSH
    DISABLE_INTR_FLAGS

    movl    %esp, %ebp

    movl    %esp, %ecx
    pushl   %gs:CPU_ID
    pushl   $0      /* fake cr2 (cmntrap is used for page faults) */
    pushl   %ecx
    call    trap        /* trap(rp, addr, cpuid) handles all traps */
    addl    $12, %esp   /* get argument off stack */

    jmp _sys_rtt
    SET_SIZE(cmninttrap)

#endif  /* __i386 */

#endif  /* __lint */

#if defined(__lint)

/* ARGSUSED */
void
dtrace_trap()
{}

#else   /* __lint */

    .globl  dtrace_user_probe

#if defined(__amd64)

    ENTRY_NP(dtrace_trap)

    INTR_PUSH

    TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
    TRACE_REGS(%rdi, %rsp, %rbx, %rcx)  /* Uses label 9 */
    TRACE_STAMP(%rdi)       /* Clobbers %eax, %edx, uses 9 */

    movq    %rsp, %rbp

    movl    %gs:CPU_ID, %edx
    movq    %cr2, %rsi
    movq    %rsp, %rdi

    ENABLE_INTR_FLAGS

    call    dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
    jmp _sys_rtt

    SET_SIZE(dtrace_trap)

#elif defined(__i386)

    ENTRY_NP(dtrace_trap)

    INTR_PUSH

    TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
    TRACE_REGS(%edi, %esp, %ebx, %ecx)  /* Uses label 9 */
    TRACE_STAMP(%edi)       /* Clobbers %eax, %edx, uses 9 */

    movl    %esp, %ebp

    pushl   %gs:CPU_ID
    movl    %cr2, %eax
    pushl   %eax
    pushl   %ebp

    pushl   $F_ON
    popfl

    call    dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
    addl    $12, %esp       /* get argument off stack */

    jmp _sys_rtt
    SET_SIZE(dtrace_trap)

#endif  /* __i386 */

#endif  /* __lint */

/*
 * Return from _sys_trap routine.
 */

#if defined(__lint)

void
lwp_rtt_initial(void)
{}

void
lwp_rtt(void)
{}

void
_sys_rtt(void)
{}

#else   /* __lint */

#if defined(__amd64)

    ENTRY_NP(lwp_rtt_initial)
    movq    %gs:CPU_THREAD, %r15
    movq    T_STACK(%r15), %rsp /* switch to the thread stack */
    call    __dtrace_probe___proc_start
    jmp _lwp_rtt

    ENTRY_NP(lwp_rtt)

    /*
     * r14  lwp
     * rdx  lwp->lwp_procp
     * r15  curthread
     */

    movq    %gs:CPU_THREAD, %r15
    movq    T_STACK(%r15), %rsp /* switch to the thread stack */
_lwp_rtt:
    call    __dtrace_probe___proc_lwp__start
    movq    %gs:CPU_LWP, %r14
    movq    LWP_PROCP(%r14), %rdx

    /*
     * XX64 Is the stack misaligned correctly at this point?
     *  If not, we need to do a push before calling anything ..
     */

#if defined(DEBUG)
    /*
     * If we were to run lwp_savectx at this point -without-
     * RUPDATE_PENDING being set, we'd end up sampling the hardware
     * state left by the previous running lwp, rather than setting
     * the values requested by the lwp creator.  Bad.
     */
    testl   $RUPDATE_PENDING, PCB_FLAGS(%r14)
    jne 1f
    leaq    _no_pending_updates(%rip), %rdi
    movl    $__LINE__, %esi
    movq    %r14, %rdx
    xorl    %eax, %eax
    call    panic
_no_pending_updates:
    .string "%M%:%d lwp_rtt(lwp %p) but no RUPDATE_PENDING"
1:
#endif

    /*
     * If agent lwp, clear %fs and %gs
     */
    cmpq    %r15, P_AGENTTP(%rdx)
    jne 1f
    xorl    %ecx, %ecx
    movq    %rcx, REGOFF_FS(%rsp)
    movq    %rcx, REGOFF_GS(%rsp)
    movw    %cx, LWP_PCB_FS(%r14)
    movw    %cx, LWP_PCB_GS(%r14)
1:
    call    dtrace_systrace_rtt
    movq    REGOFF_RDX(%rsp), %rsi
    movq    REGOFF_RAX(%rsp), %rdi
    call    post_syscall        /* post_syscall(rval1, rval2) */
    movq    %cr0, %rax
    orq $CR0_TS, %rax
    movq    %rax, %cr0      /* set up to take fault */
                    /* on first use of fp */
    ALTENTRY(_sys_rtt)
    testw   $CPL_MASK, REGOFF_CS(%rsp) /* test %cs for user mode return */
    jz  sr_sup

    /*
     * Return to user
     */
    cmpw    $UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */
    je  sys_rtt_syscall

    /*
     * Return to 32-bit userland, checking for an AST, and (optionally)
     * twerking any changing segment registers on the way out.
     */
    ALTENTRY(sys_rtt_syscall32)
    movq    %gs:CPU_THREAD, %rax
    cli             /* disable interrupts */
    cmpb    $0, T_ASTFLAG(%rax) /* test for signal or AST flag */
    jne _handle_ast

    movq    T_LWP(%rax), %r14
    movl    PCB_FLAGS(%r14), %edx
    testl   $RUPDATE_PENDING, %edx
    jne _update_sregs

    /*
     * The following label is branched to by sys_syscall32 handler
     * so if iretq below #gp faults kern_gpfault can detect and
     * handle it.
     */
    ALTENTRY(_set_user32_regs)
    USER32_POP
    iretq
    /*NOTREACHED*/

    /*
     * Return to 64-bit userland, checking for an AST, and (optionally)
     * twerking any changing segment registers on the way out.
     */
    ALTENTRY(sys_rtt_syscall)
    movq    %gs:CPU_THREAD, %rax
    cli             /* disable interrupts */
    cmpb    $0, T_ASTFLAG(%rax) /* test for signal or AST flag */
    jne _handle_ast

    movq    T_LWP(%rax), %r14
    movl    PCB_FLAGS(%r14), %edx
    testl   $RUPDATE_PENDING, %edx
    jne _update_sregs
_set_user_regs:
    USER_POP
    iretq
    /*NOTREACHED*/

    /*
     * XX64 Don't understand why we ever come here.
     *  If we do come here then we can probably recover by just
     *  branching straight to _sys_rtt
     */
bad_set_user_regs:
    leaq    __bad_set_user_regs_msg(%rip), %rdi
    movq    %rsp, %rsi
    movq    REGOFF_CS(%rsp), %rdx
    xorl    %eax, %eax
    call    panic
__bad_set_user_regs_msg:
    .string "bad_set_user_regs: rp=%lx rp->r_cs=%lx; how did that happen?"

    /*
     * Update the segment registers with new values from the pcb
     *
     * We have to do this carefully, and in the following order,
     * in case any of the selectors points at a bogus descriptor.
     * If they do, we'll take a trap that will end in delivering
     * the application a weird SIGSEGV via kern_gpfault().
     *
     * This is particularly tricky for %gs.
     *
     * Note also the slightly brittle nature of the dependencies
     * between this code and kern_gpfault().
     */
    ENTRY_NP(_update_sregs)
    movw    LWP_PCB_GS(%r14), %bx
    movl    $MSR_AMD_GSBASE, %ecx
    rdmsr
    movw    %bx, %gs
    /*
     * If that instruction failed because the new %gs is a bad %gs,
     * we'll be taking a trap but with the original %gs and %gsbase
     * undamaged (i.e. pointing at curcpu).  Otherwise, we've just
     * mucked up the kernel's gsbase.  Oops.  Make the newly computed
     * gsbase be the hidden gs, and fix the kernel's gsbase back again.
     */
#if defined(OPTERON_ERRATUM_88)
    mfence
#endif
    swapgs
    wrmsr               /* repair kernel gsbase */
    movw    %bx, REGOFF_GS(%rsp)
    /*
     * Only override the descriptor base address if r_gs == LWPGS_SEL
     * or if r_gs == NULL.
     *
     * A note on NULL descriptors -- 32-bit programs take faults if
     * they deference NULL descriptors; however, when 64-bit programs
     * load them into %fs or %gs, they DONT fault -- only the base
     * address remains whatever it was from the last load.   Urk.
     *
     * Avoid any leakage by setting the base address to an inaccessible
     * region.
     */
    cmpw    $0, %bx
    jne 0f
    movl    $0x80000000, LWP_PCB_GSBASE(%r14)
    movl    $0xffffffff, LWP_PCB_GSBASE+4(%r14)
    jmp _set_gsbase
0:  cmpw    $LWPGS_SEL, %bx
    jne 1f
_set_gsbase:
    movl    $MSR_AMD_KGSBASE, %ecx
    movl    LWP_PCB_GSBASE(%r14), %eax
    movl    LWP_PCB_GSBASE+4(%r14), %edx
    wrmsr               /* update hidden gsbase for userland */
    movl    %eax, REGOFF_GSBASE(%rsp)
    movl    %edx, REGOFF_GSBASE+4(%rsp)
1:  movw    LWP_PCB_FS(%r14), %bx
    movw    %bx, %fs
    movw    %bx, REGOFF_FS(%rsp)
    /*
     * Only override the descriptor base address if r_fs == LWPFS_SEL
     * or if r_fs == NULL.
     */
    cmpw    $0, %bx
    jne 2f
    movl    $0x80000000, LWP_PCB_FSBASE(%r14)
    movl    $0xffffffff, LWP_PCB_FSBASE+4(%r14)
    jmp _set_fsbase
2:  cmpw    $LWPFS_SEL, %bx
    jne 3f
_set_fsbase:
    movl    $MSR_AMD_FSBASE, %ecx
    movl    LWP_PCB_FSBASE(%r14), %eax
    movl    LWP_PCB_FSBASE+4(%r14), %edx
    wrmsr               /* update fsbase for userland */
    movl    %eax, REGOFF_FSBASE(%rsp)
    movl    %edx, REGOFF_FSBASE+4(%rsp)
3:
    movw    LWP_PCB_ES(%r14), %bx
    movw    %bx, %es
    movw    %bx, REGOFF_ES(%rsp)
    movw    LWP_PCB_DS(%r14), %bx
    movw    %bx, %ds
    movw    %bx, REGOFF_DS(%rsp)
    andl    $_BITNOT(RUPDATE_PENDING), PCB_FLAGS(%r14)
    jmp _set_user_regs
    .globl  _update_sregs_done
_update_sregs_done:
    /*NOTREACHED*/
    SET_SIZE(_update_sregs)

_handle_ast:
    /* Let trap() handle the AST */
    sti             /* enable interrupts */
    movl    $T_AST, REGOFF_TRAPNO(%rsp) /* set trap type to AST */
    movq    %rsp, %rdi
    xorl    %esi, %esi
    movl    %gs:CPU_ID, %edx
    call    trap            /* trap(rp, 0, cpuid) */
    jmp _sys_rtt
    /*NOTREACHED*/

    /*
     * Return to supervisor
     */
    ALTENTRY(sr_sup)

    /* Check for a kernel preemption request */
    cmpb    $0, %gs:CPU_KPRUNRUN
    jne sr_sup_preempt      /* preemption */

    /*
     * Restore regs before doing iretq to kernel mode
     */
set_sup_regs:
    cmpw    $KCS_SEL, REGOFF_CS(%rsp) /* test %cs for return to user mode */
    jne bad_set_user_regs   /* ... (this should not be necessary) */

    /*
     * If we interrupted the mutex_exit() critical region we must
     * reset the PC back to the beginning to prevent missed wakeups.
     * See the comments in mutex_exit() for details.
     */
    movq    REGOFF_PC(%rsp), %rdx
    leaq    mutex_exit_critical_start(%rip), %r8
    subq    %r8, %rdx
    cmpq    mutex_exit_critical_size(%rip), %rdx
    jae .mutex_exit_check_done
    movq    %r8, REGOFF_PC(%rsp)
                    /* restart mutex_exit() */
.mutex_exit_check_done:
    INTR_POP
    iretq

sr_sup_preempt:
    movq    %gs:CPU_THREAD, %rbx
    cmpb    $0, T_PREEMPT_LK(%rbx)  /* Already in kpreempt? */
    jne set_sup_regs        /* if so, get out of here */
    movb    $1, T_PREEMPT_LK(%rbx)  /* otherwise, set t_preempt_lk */

    sti             /* it's now safe to unmask interrupts */

    movl    $1, %edi        /* indicate asynchronous call */
    call    kpreempt        /* kpreempt(1) */

    cli             /* re-mask interrupts */
    movb    $0, T_PREEMPT_LK(%rbx)  /* clear t_preempt_lk */

    jmp set_sup_regs
    SET_SIZE(sr_sup)
    SET_SIZE(lwp_rtt)
    SET_SIZE(lwp_rtt_initial)
    SET_SIZE(_sys_rtt)
    SET_SIZE(sys_rtt_syscall)
    SET_SIZE(sys_rtt_syscall32)

#elif defined(__i386)

    ENTRY_NP(lwp_rtt_initial)
    movl    %gs:CPU_THREAD, %eax
    movl    T_STACK(%eax), %esp /* switch to the thread stack */
    call    __dtrace_probe___proc_start
    jmp _lwp_rtt

    ENTRY_NP(lwp_rtt)
    movl    %gs:CPU_THREAD, %eax
    movl    T_STACK(%eax), %esp /* switch to the thread stack */
_lwp_rtt:
    call    __dtrace_probe___proc_lwp__start

        /*
         * If agent lwp, clear %fs and %gs.
         */
        movl    %gs:CPU_LWP, %eax
        movl    LWP_PROCP(%eax), %edx

        cmpl    %eax, P_AGENTTP(%edx)
        jne     1f
        movl    $0, REGOFF_FS(%esp)
        movl    $0, REGOFF_GS(%esp)
1:
    call    dtrace_systrace_rtt
    movl    REGOFF_EDX(%esp), %edx
    movl    REGOFF_EAX(%esp), %eax
    pushl   %edx
    pushl   %eax
    call    post_syscall        /* post_syscall(rval1, rval2) */
    addl    $8, %esp
    movl    %cr0, %eax
    orl $CR0_TS, %eax
    movl    %eax, %cr0      /* set up to take fault */
                    /* on first use of fp */

    ALTENTRY(_sys_rtt)
    testw   $CPL_MASK, REGOFF_CS(%esp) /* test CS for return to user mode */
    jz  sr_sup

    /*
     * Return to User.
     */

    ALTENTRY(sys_rtt_syscall)
    /* check for an AST that's posted to the lwp */
    movl    %gs:CPU_THREAD, %eax
    cli             /* disable interrupts */
    cmpb    $0, T_ASTFLAG(%eax) /* test for signal or AST flag */
    jne handle_ast


    /*
     * Restore user regs before returning to user mode.
     */
    ALTENTRY(set_user_regs)
    INTR_POP_USER
    iret
    SET_SIZE(set_user_regs)

handle_ast:
    /* Let trap() handle the AST */
    sti             /* enable interrupts */
    movl    $T_AST, REGOFF_TRAPNO(%esp) /* set trap type to AST */
    movl    %esp, %eax
    pushl   %gs:CPU_ID
    pushl   $0
    pushl   %eax
    call    trap            /* trap(rp, 0, cpuid) */
    addl    $12, %esp
    jmp _sys_rtt

    /*
     * Return to supervisor
     */
    ALTENTRY(sr_sup)

    /* Check for a kernel preemption request */
    cmpb    $0, %gs:CPU_KPRUNRUN
    jne sr_sup_preempt      /* preemption */

    /*
     * Restore regs before doing iret to kernel mode
     */
set_sup_regs:
    cmpw    $KGS_SEL, REGOFF_GS(%esp) /* test GS for return to user mode */
    jne set_user_regs       /* ... (this should not be necessary) */

    /*
     * If we interrupted the mutex_exit() critical region we must
     * reset the PC back to the beginning to prevent missed wakeups.
     * See the comments in mutex_exit() for details.
     */
    movl    REGOFF_PC(%esp), %edx
    subl    $mutex_exit_critical_start, %edx
    cmpl    mutex_exit_critical_size, %edx
    jae .mutex_exit_check_done
    movl    $mutex_exit_critical_start, REGOFF_PC(%esp)
                    /* restart mutex_exit() */
.mutex_exit_check_done:
    INTR_POP_KERNEL
    iret

sr_sup_preempt:
    movl    %gs:CPU_THREAD, %ebx
    cmpb    $0, T_PREEMPT_LK(%ebx)  /* Already in kpreempt? */
    jne set_sup_regs        /* if so, get out of here */
    movb    $1, T_PREEMPT_LK(%ebx)  /* otherwise, set t_preempt_lk */

    sti             /* it's now safe to unmask interrupts */

    pushl   $1          /* indicate asynchronous call */
    call    kpreempt        /* kpreempt(1) */
    addl    $4, %esp

    cli             /* re-mask interrupts */
    movb    $0, T_PREEMPT_LK(%ebx)  /* clear t_preempt_lk */

    jmp set_sup_regs
    SET_SIZE(sr_sup)
    SET_SIZE(lwp_rtt)
    SET_SIZE(lwp_rtt_initial)
    SET_SIZE(_sys_rtt)
    SET_SIZE(sys_rtt_syscall)

#endif  /* __i386 */

#endif  /* __lint */

#if defined(__lint)

/*
 * So why do we have to deal with all this crud in the world of ia32?
 *
 * Basically there are four classes of ia32 implementations, those that do not
 * have a TSC, those that have a marginal TSC that is broken to the extent
 * that it is useless, those that have a marginal TSC that is not quite so
 * horribly broken and can be used with some care, and those that have a
 * reliable TSC. This crud has to be here in order to sift through all the
 * variants.
 */

/*ARGSUSED*/
uint64_t
freq_tsc(uint32_t *pit_counter)
{
    return (0);
}

#else   /* __lint */

#if defined(__amd64)

    /*
     * XX64 quick and dirty port from the i386 version. Since we
     * believe the amd64 tsc is more reliable, could this code be
     * simpler?
     */
    ENTRY_NP(freq_tsc)
    pushq   %rbp
    movq    %rsp, %rbp
    movq    %rdi, %r9   /* save pit_counter */
    pushq   %rbx

    pushfq
    cli

/ We have a TSC, but we have no way in general to know how reliable it is.
/ Usually a marginal TSC behaves appropriately unless not enough time
/ elapses between reads. A reliable TSC can be read as often and as rapidly
/ as desired. The simplistic approach of reading the TSC counter and
/ correlating to the PIT counter cannot be naively followed. Instead estimates
/ have to be taken to successively refine a guess at the speed of the cpu
/ and then the TSC and PIT counter are correlated. In practice very rarely
/ is more than one quick loop required for an estimate. Measures have to be
/ taken to prevent the PIT counter from wrapping beyond its resolution and for
/ measuring the clock rate of very fast processors.
/
/ The following constant can be tuned. It should be such that the loop does
/ not take too many nor too few PIT counts to execute. If this value is too
/ large, then on slow machines the loop will take a long time, or the PIT
/ counter may even wrap. If this value is too small, then on fast machines
/ the PIT counter may count so few ticks that the resolution of the PIT
/ itself causes a bad guess. Because this code is used in machines with
/ marginal TSC's and/or IO, if this value is too small on those, it may
/ cause the calculated cpu frequency to vary slightly from boot to boot.
/
/ In all cases even if this constant is set inappropriately, the algorithm
/ will still work and the caller should be able to handle variances in the
/ calculation of cpu frequency, but the calculation will be inefficient and
/ take a disproportionate amount of time relative to a well selected value.
/ As the slowest supported cpu becomes faster, this constant should be
/ carefully increased.

    movl    $0x8000, %ecx

    / to make sure the instruction cache has been warmed
    clc

    jmp freq_tsc_loop

/ The following block of code up to and including the latching of the PIT
/ counter after freq_tsc_perf_loop is very critical and very carefully
/ written, it should only be modified with great care. freq_tsc_loop to
/ freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
/ freq_tsc_perf_loop up to the unlatching of the PIT counter.

    .align  32
freq_tsc_loop:
    / save the loop count in %ebx
    movl    %ecx, %ebx

    / initialize the PIT counter and start a count down
    movb    $PIT_LOADMODE, %al
    outb    $PITCTL_PORT
    movb    $0xff, %al
    outb    $PITCTR0_PORT
    outb    $PITCTR0_PORT

    / read the TSC and store the TS in %edi:%esi
    rdtsc
    movl    %eax, %esi

freq_tsc_perf_loop:
    movl    %edx, %edi
    movl    %eax, %esi
    movl    %edx, %edi
    loop    freq_tsc_perf_loop

    / read the TSC and store the LSW in %ecx
    rdtsc
    movl    %eax, %ecx

    / latch the PIT counter and status
    movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
    outb    $PITCTL_PORT

    / remember if the icache has been warmed
    setc    %ah

    / read the PIT status
    inb $PITCTR0_PORT
    shll    $8, %eax

    / read PIT count
    inb $PITCTR0_PORT
    shll    $8, %eax
    inb $PITCTR0_PORT
    bswap   %eax

    / check to see if the PIT count was loaded into the CE
    btw $_CONST(PITSTAT_NULLCNT+8), %ax
    jc  freq_tsc_increase_count

    / check to see if PIT counter wrapped
    btw $_CONST(PITSTAT_OUTPUT+8), %ax
    jnc freq_tsc_pit_did_not_wrap

    / halve count
    shrl    $1, %ebx
    movl    %ebx, %ecx

    / the instruction cache has been warmed
    stc

    jmp freq_tsc_loop

freq_tsc_increase_count:
    shll    $1, %ebx
    jc  freq_tsc_too_fast

    movl    %ebx, %ecx

    / the instruction cache has been warmed
    stc

    jmp freq_tsc_loop

freq_tsc_pit_did_not_wrap:
    roll    $16, %eax

    cmpw    $0x2000, %ax
    notw    %ax
    jb  freq_tsc_sufficient_duration

freq_tsc_calculate:
    / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
    / then on the second CLK pulse the CE is decremented, therefore mode 0
    / is really a (count + 1) counter, ugh
    xorl    %esi, %esi
    movw    %ax, %si
    incl    %esi

    movl    $0xf000, %eax
    mull    %ebx

    / tuck away (target_pit_count * loop_count)
    movl    %edx, %ecx
    movl    %eax, %ebx

    movl    %esi, %eax
    movl    $0xffffffff, %edx
    mull    %edx

    addl    %esi, %eax
    adcl    $0, %edx

    cmpl    %ecx, %edx
    ja  freq_tsc_div_safe
    jb  freq_tsc_too_fast

    cmpl    %ebx, %eax
    jbe freq_tsc_too_fast

freq_tsc_div_safe:
    movl    %ecx, %edx
    movl    %ebx, %eax

    movl    %esi, %ecx
    divl    %ecx

    movl    %eax, %ecx

    / the instruction cache has been warmed
    stc

    jmp freq_tsc_loop

freq_tsc_sufficient_duration:
    / test to see if the icache has been warmed
    btl $16, %eax
    jnc freq_tsc_calculate

    / recall mode 0 is a (count + 1) counter
    andl    $0xffff, %eax
    incl    %eax

    / save the number of PIT counts
    movl    %eax, (%r9)

    / calculate the number of TS's that elapsed
    movl    %ecx, %eax
    subl    %esi, %eax
    sbbl    %edi, %edx

    jmp freq_tsc_end

freq_tsc_too_fast:
    / return 0 as a 64 bit quantity
    xorl    %eax, %eax
    xorl    %edx, %edx

freq_tsc_end:
    shlq    $32, %rdx
    orq %rdx, %rax
    / restore state of the interrupt flag
    popfq

    popq    %rbx
    leaveq
    ret
    SET_SIZE(freq_tsc)

#elif defined(__i386)

    ENTRY_NP(freq_tsc)
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %edi
    pushl   %esi
    pushl   %ebx

    pushfl
    cli

/ We have a TSC, but we have no way in general to know how reliable it is.
/ Usually a marginal TSC behaves appropriately unless not enough time
/ elapses between reads. A reliable TSC can be read as often and as rapidly
/ as desired. The simplistic approach of reading the TSC counter and
/ correlating to the PIT counter cannot be naively followed. Instead estimates
/ have to be taken to successively refine a guess at the speed of the cpu
/ and then the TSC and PIT counter are correlated. In practice very rarely
/ is more than one quick loop required for an estimate. Measures have to be
/ taken to prevent the PIT counter from wrapping beyond its resolution and for
/ measuring the clock rate of very fast processors.
/
/ The following constant can be tuned. It should be such that the loop does
/ not take too many nor too few PIT counts to execute. If this value is too
/ large, then on slow machines the loop will take a long time, or the PIT
/ counter may even wrap. If this value is too small, then on fast machines
/ the PIT counter may count so few ticks that the resolution of the PIT
/ itself causes a bad guess. Because this code is used in machines with
/ marginal TSC's and/or IO, if this value is too small on those, it may
/ cause the calculated cpu frequency to vary slightly from boot to boot.
/
/ In all cases even if this constant is set inappropriately, the algorithm
/ will still work and the caller should be able to handle variances in the
/ calculation of cpu frequency, but the calculation will be inefficient and
/ take a disproportionate amount of time relative to a well selected value.
/ As the slowest supported cpu becomes faster, this constant should be
/ carefully increased.

    movl    $0x8000, %ecx

    / to make sure the instruction cache has been warmed
    clc

    jmp freq_tsc_loop

/ The following block of code up to and including the latching of the PIT
/ counter after freq_tsc_perf_loop is very critical and very carefully
/ written, it should only be modified with great care. freq_tsc_loop to
/ freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
/ freq_tsc_perf_loop up to the unlatching of the PIT counter.

    .align  32
freq_tsc_loop:
    / save the loop count in %ebx
    movl    %ecx, %ebx

    / initialize the PIT counter and start a count down
    movb    $PIT_LOADMODE, %al
    outb    $PITCTL_PORT
    movb    $0xff, %al
    outb    $PITCTR0_PORT
    outb    $PITCTR0_PORT

    / read the TSC and store the TS in %edi:%esi
    rdtsc
    movl    %eax, %esi

freq_tsc_perf_loop:
    movl    %edx, %edi
    movl    %eax, %esi
    movl    %edx, %edi
    loop    freq_tsc_perf_loop

    / read the TSC and store the LSW in %ecx
    rdtsc
    movl    %eax, %ecx

    / latch the PIT counter and status
    movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
    outb    $PITCTL_PORT

    / remember if the icache has been warmed
    setc    %ah

    / read the PIT status
    inb $PITCTR0_PORT
    shll    $8, %eax

    / read PIT count
    inb $PITCTR0_PORT
    shll    $8, %eax
    inb $PITCTR0_PORT
    bswap   %eax

    / check to see if the PIT count was loaded into the CE
    btw $_CONST(PITSTAT_NULLCNT+8), %ax
    jc  freq_tsc_increase_count

    / check to see if PIT counter wrapped
    btw $_CONST(PITSTAT_OUTPUT+8), %ax
    jnc freq_tsc_pit_did_not_wrap

    / halve count
    shrl    $1, %ebx
    movl    %ebx, %ecx

    / the instruction cache has been warmed
    stc

    jmp freq_tsc_loop

freq_tsc_increase_count:
    shll    $1, %ebx
    jc  freq_tsc_too_fast

    movl    %ebx, %ecx

    / the instruction cache has been warmed
    stc

    jmp freq_tsc_loop

freq_tsc_pit_did_not_wrap:
    roll    $16, %eax

    cmpw    $0x2000, %ax
    notw    %ax
    jb  freq_tsc_sufficient_duration

freq_tsc_calculate:
    / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
    / then on the second CLK pulse the CE is decremented, therefore mode 0
    / is really a (count + 1) counter, ugh
    xorl    %esi, %esi
    movw    %ax, %si
    incl    %esi

    movl    $0xf000, %eax
    mull    %ebx

    / tuck away (target_pit_count * loop_count)
    movl    %edx, %ecx
    movl    %eax, %ebx

    movl    %esi, %eax
    movl    $0xffffffff, %edx
    mull    %edx

    addl    %esi, %eax
    adcl    $0, %edx

    cmpl    %ecx, %edx
    ja  freq_tsc_div_safe
    jb  freq_tsc_too_fast

    cmpl    %ebx, %eax
    jbe freq_tsc_too_fast

freq_tsc_div_safe:
    movl    %ecx, %edx
    movl    %ebx, %eax

    movl    %esi, %ecx
    divl    %ecx

    movl    %eax, %ecx

    / the instruction cache has been warmed
    stc

    jmp freq_tsc_loop

freq_tsc_sufficient_duration:
    / test to see if the icache has been warmed
    btl $16, %eax
    jnc freq_tsc_calculate

    / recall mode 0 is a (count + 1) counter
    andl    $0xffff, %eax
    incl    %eax

    / save the number of PIT counts
    movl    8(%ebp), %ebx
    movl    %eax, (%ebx)

    / calculate the number of TS's that elapsed
    movl    %ecx, %eax
    subl    %esi, %eax
    sbbl    %edi, %edx

    jmp freq_tsc_end

freq_tsc_too_fast:
    / return 0 as a 64 bit quantity
    xorl    %eax, %eax
    xorl    %edx, %edx

freq_tsc_end:
    / restore state of the interrupt flag
    popfl

    popl    %ebx
    popl    %esi
    popl    %edi
    popl    %ebp

    ret
    SET_SIZE(freq_tsc)

#endif  /* __i386 */
#endif  /* __lint */

#if !defined(__amd64)
#if defined(__lint)

/*
 * We do not have a TSC so we use a block of instructions with well known
 * timings.
 */

/*ARGSUSED*/
uint64_t
freq_notsc(uint32_t *pit_counter)
{
    return (0);
}

#else   /* __lint */
    ENTRY_NP(freq_notsc)
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %edi
    pushl   %esi
    pushl   %ebx

    pushfl
    cli

    / initial count for the idivl loop
    movl    $0x1000, %ecx

    / load the divisor
    movl    $1, %ebx

    jmp freq_notsc_loop

.align  16
freq_notsc_loop:
    / set high 32 bits of dividend to zero
    xorl    %edx, %edx

    / save the loop count in %edi
    movl    %ecx, %edi

    / initialize the PIT counter and start a count down
    movb    $PIT_LOADMODE, %al
    outb    $PITCTL_PORT
    movb    $0xff, %al
    outb    $PITCTR0_PORT
    outb    $PITCTR0_PORT

    / set low 32 bits of dividend to zero
    xorl    %eax, %eax

/ It is vital that the arguments to idivl be set appropriately because on some
/ cpu's this instruction takes more or less clock ticks depending on its
/ arguments.
freq_notsc_perf_loop:
    idivl   %ebx
    idivl   %ebx
    idivl   %ebx
    idivl   %ebx
    idivl   %ebx
    loop    freq_notsc_perf_loop

    / latch the PIT counter and status
    movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
    outb    $PITCTL_PORT

    / read the PIT status
    inb $PITCTR0_PORT
    shll    $8, %eax

    / read PIT count
    inb $PITCTR0_PORT
    shll    $8, %eax
    inb $PITCTR0_PORT
    bswap   %eax

    / check to see if the PIT count was loaded into the CE
    btw $_CONST(PITSTAT_NULLCNT+8), %ax
    jc  freq_notsc_increase_count

    / check to see if PIT counter wrapped
    btw $_CONST(PITSTAT_OUTPUT+8), %ax
    jnc freq_notsc_pit_did_not_wrap

    / halve count
    shrl    $1, %edi
    movl    %edi, %ecx

    jmp freq_notsc_loop

freq_notsc_increase_count:
    shll    $1, %edi
    jc  freq_notsc_too_fast

    movl    %edi, %ecx

    jmp freq_notsc_loop

freq_notsc_pit_did_not_wrap:
    shrl    $16, %eax

    cmpw    $0x2000, %ax
    notw    %ax
    jb  freq_notsc_sufficient_duration

freq_notsc_calculate:
    / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
    / then on the second CLK pulse the CE is decremented, therefore mode 0
    / is really a (count + 1) counter, ugh
    xorl    %esi, %esi
    movw    %ax, %si
    incl    %esi

    movl    %edi, %eax
    movl    $0xf000, %ecx
    mull    %ecx

    / tuck away (target_pit_count * loop_count)
    movl    %edx, %edi
    movl    %eax, %ecx

    movl    %esi, %eax
    movl    $0xffffffff, %edx
    mull    %edx

    addl    %esi, %eax
    adcl    $0, %edx

    cmpl    %edi, %edx
    ja  freq_notsc_div_safe
    jb  freq_notsc_too_fast

    cmpl    %ecx, %eax
    jbe freq_notsc_too_fast

freq_notsc_div_safe:
    movl    %edi, %edx
    movl    %ecx, %eax

    movl    %esi, %ecx
    divl    %ecx

    movl    %eax, %ecx

    jmp freq_notsc_loop

freq_notsc_sufficient_duration:
    / recall mode 0 is a (count + 1) counter
    incl    %eax

    / save the number of PIT counts
    movl    8(%ebp), %ebx
    movl    %eax, (%ebx)

    / calculate the number of cpu clock ticks that elapsed
    cmpl    $X86_VENDOR_Cyrix, x86_vendor
    jz  freq_notsc_notcyrix

    / freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores
    movl    $86, %eax
    jmp freq_notsc_calculate_tsc

freq_notsc_notcyrix:
    / freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums
    movl    $237, %eax

freq_notsc_calculate_tsc:
    mull    %edi

    jmp freq_notsc_end

freq_notsc_too_fast:
    / return 0 as a 64 bit quantity
    xorl    %eax, %eax
    xorl    %edx, %edx

freq_notsc_end:
    / restore state of the interrupt flag
    popfl

    popl    %ebx
    popl    %esi
    popl    %edi
    popl    %ebp

    ret
    SET_SIZE(freq_notsc)

    /
    / setup_121_andcall(funcp, argument)
    /
    / invokes funcp(argument) after having setup one to one mapping
    / for the virtual address funcp to its physical address.
    / This function cannot be invoked with PAE enabled, this could
    / be used to enable PAE
    /
    ENTRY_NP(setup_121_andcall)
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %ebx
    movl    8(%ebp), %ecx       / funcp
    movl    %ecx, %edx
    shrl    $MMU_STD_PAGESHIFT, %ecx
    andl    $MMU_L2_MASK, %ecx  / pagetable index
    movl    %cr3, %eax
    andl    $MMU_STD_PAGEMASK, %eax / pagedirectory pointer
    pushl   %eax
    shrl    $MMU_STD_PAGESHIFT+NPTESHIFT, %edx
    movl    (%eax, %edx, 4), %eax   / funcp's pagetable
    testl   $PTE_LARGEPAGE, %eax
    je  not_a_large_page
    shrl    $MMU_STD_PAGESHIFT, %eax
    addl    %eax, %ecx
    movl    %ecx, %edx
    shll    $MMU_STD_PAGESHIFT, %edx
    orl $_CONST(PTE_SRWX|PTE_VALID), %edx
    jmp got_pfn

not_a_large_page:
    andl    $MMU_STD_PAGEMASK, %eax     / ignore ref and other bits
    movl    (%eax, %ecx, 4), %ecx   / pte entry
    movl    %ecx, %edx
    shrl    $MMU_STD_PAGESHIFT, %ecx /pfnum of funcp
got_pfn:
    movl    %ecx, %ebx
    andl    $MMU_L2_MASK, %ecx
    shrl    $NPTESHIFT, %ebx
    movl    0(%esp), %eax
    leal    (%eax, %ebx, 4), %eax   / new pde entry  for the function
    pushl   %eax
    movl    (%eax), %eax
    testl   $PTE_VALID, %eax        / is the pde entry valid
    jne pagetable_isvalid
    movl    $one2one_pagetable, %ebx
    addl    $MMU_STD_PAGESIZE, %ebx / move to next 4k
    shrl    $MMU_STD_PAGESHIFT+NPTESHIFT, %ebx
    movl    4(%esp), %eax
    movl    (%eax, %ebx, 4), %eax
    testl   $PTE_LARGEPAGE, %eax
    je  not_a_large_page1
    shrl    $MMU_STD_PAGESHIFT, %eax
    movl    $one2one_pagetable, %ebx
    addl    $MMU_STD_PAGESIZE, %ebx / move to next 4k
    shrl    $MMU_STD_PAGESHIFT, %ebx
    andl    $MMU_L2_MASK, %ebx
    addl    %ebx, %eax
    shll    $MMU_STD_PAGESHIFT, %eax
    orl $_CONST(PTE_SRWX|PTE_VALID), %eax
    jmp got_pfn1
not_a_large_page1:
    andl    $MMU_STD_PAGEMASK, %eax     / ignore ref and other bits
    movl    $one2one_pagetable, %ebx
    addl    $MMU_STD_PAGESIZE, %ebx / move to next 4k
    shrl    $MMU_STD_PAGESHIFT, %ebx
    andl    $MMU_L2_MASK, %ebx
    movl    (%eax, %ebx, 4), %eax   / pte entry for one2one_pagetable
got_pfn1:
    movl    0(%esp), %ebx
    movl    %eax, (%ebx)        / point pde entry at one2one_pagetable
    movl    $one2one_pagetable, %eax / load pagetable ptr
    addl    $MMU_STD_PAGESIZE, %eax     / 4K aligned virtual address
    andl    $MMU_STD_PAGEMASK, %eax
    movl    $0, 4(%esp)     / previous pde is invalid
    jmp pagetable_madevalid
pagetable_isvalid:
    movl    %eax, 4(%esp)       / save pde
pagetable_madevalid:
    andl    $MMU_STD_PAGEMASK, %eax / ignore lower 12 bits

    pushl   (%eax, %ecx, 4)     / save the old pagetable entry that
                    / belongs to boot

    movl    %edx, (%eax, %ecx, 4)   / We now have a 1-1 mapping for the
                    / function funcp
    leal    (%eax, %ecx, 4), %eax
    pushl   %eax            / save the address of pagetable entry
                    / we patched above

    movl    8(%ebp), %eax       / funcp
    andl    $MMU_PAGEOFFSET, %eax       / pick the 4k offset
    andl    $MMU_STD_PAGEMASK, %edx
    addl    %edx, %eax
    movl    12(%ebp), %ebx      / argument
    movl    %cr3, %ecx
    movl    %ecx, %cr3
    call    *%eax           / jump to funcp(%eax)
                    / we have 1-1 mapping
                    / for this particular page, ret address
                    / still points to space in 0xe00xxxxx

    testl    %eax, %eax
    jne cr3_changed
    popl    %eax            / pop the pagetable entry address
    popl    %ecx            / pop the pagetable entry
    movl    %ecx, (%eax)        / restore the pte that we destroyed
    popl    %eax            / pop the pagedirectory address
    popl    %ecx            / pop pde
    movl    %ecx, (%eax)        / restore pde
    movl    %cr3, %eax
    movl    %eax, %cr3
    popl    %ebx
    popl    %ebp
    ret
cr3_changed:
    addl    $16, %esp       / we had saved 4 arguments
    popl    %ebx
    popl    %ebp
    ret
    SET_SIZE(setup_121_andcall)

#endif  /* __lint */
#endif  /* !__amd64 */

#if !defined(__lint)
    .data
#if !defined(__amd64)
    .align  4
cpu_vendor:
    .long   0, 0, 0     /* Vendor ID string returned */

    .globl  CyrixInstead

    .globl  x86_feature
    .globl  x86_type
    .globl  x86_vendor
#endif
    .globl  cr4_value

    /*
     * pagetable that would allow a one to one mapping of the
     * first 4K of kernel text.
     * we allocate 8K so that we have a 4K pagetable aligned
     * across 4K boundary
     */
    .comm   one2one_pagetable, 8192

#endif  /* __lint */