rtld/amd64/boot_elf.s

	boot_elf.s revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 *  Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 *  Use is subject to license terms.
 */
#pragma ident   "%Z%%M% %I% %E% SMI"

#if defined(lint)

#include    <sys/types.h>
#include    <_rtld.h>
#include    <_audit.h>
#include    <_elf.h>
#include    <sys/regset.h>

/* ARGSUSED0 */
int
elf_plt_trace()
{
    return (0);
}
#else

#include    <link.h>
#include    <_audit.h>
#include    <sys/asm_linkage.h>

    .file   "boot_elf.s"
    .text

/*
 * On entry the 'glue code' has already  done the following:
 *
 *  pushq   %rbp
 *  movq    %rsp, %rbp
 *  subq    $0x10, %rsp
 *  leaq    trace_fields(%rip), %r11
 *  movq    %r11, -0x8(%rbp)
 *  movq    $elf_plt_trace, %r11
 *  jmp *%r11
 *
 * so - -8(%rbp) contains the dyndata ptr
 *
 *  0x0 Addr        *reflmp
 *  0x8 Addr        *deflmp
 *  0x10    Word        symndx
 *  0x14    Word        sb_flags
 *  0x18    Sym     symdef.st_name
 *  0x1c            symdef.st_info
 *  0x1d            symdef.st_other
 *  0x1e            symdef.st_shndx
 *  0x20            symdef.st_value
 *  0x28            symdef.st_size
 *
 * Also note - on entry 16 bytes have already been subtracted
 * from the %rsp.  The first 8 bytes is for the dyn_data_ptr,
 * the second 8 bytes are to align the stack and are available
 * for use.
 */
#define REFLMP_OFF      0x0
#define DEFLMP_OFF      0x8
#define SYMNDX_OFF      0x10
#define SBFLAGS_OFF     0x14
#define SYMDEF_OFF      0x18
#define SYMDEF_VALUE_OFF    0x20
/*
 * Local stack space storage for elf_plt_trace is allocated
 * as follows:
 *
 *  First - before we got here - %rsp has been decremented
 *  by 0x10 to make space for the dyndata ptr (and another
 *  free word).  In addition to that, we create space
 *  for the following:
 *
 *  La_amd64_regs       8 * 8:  64
 *  prev_stack_size     8        8
 *  Saved regs:
 *      %rdi             8
 *      %rsi             8
 *      %rdx             8
 *      %rcx             8
 *      %r8              8
 *      %r9              8
 *      %r10             8
 *      %r11             8
 *      %rax             8
 *                  =======
 *              Total:  144 (16byte aligned)
 *
 *  So - will subtract the following to create enough space
 *
 *  -8(%rbp)    store dyndata ptr
 *  -16(%rbp)   store call destination
 *  -80(%rbp)   space for La_amd64_regs
 *  -88(%rbp)   prev stack size
 *  -96(%rbp)   entering %rdi
 *  -104(%rbp)  entering %rsi
 *  -112(%rbp)  entering %rdx
 *  -120(%rbp)  entering %rcx
 *  -128(%rbp)  entering %r8
 *  -136(%rbp)  entering %r9
 *  -144(%rbp)  entering %r10
 *  -152(%rbp)  entering %r11
 *  -160(%rax)  entering %rax
 *
 */
#define SPDYNOFF    -8
#define SPDESTOFF   -16
#define SPLAREGOFF  -80
#define SPPRVSTKOFF -88
#define SPRDIOFF    -96
#define SPRSIOFF    -104
#define SPRDXOFF    -112
#define SPRCXOFF    -120
#define SPR8OFF     -128
#define SPR9OFF     -136
#define SPR10OFF    -144
#define SPR11OFF    -152
#define SPRAXOFF    -160

    .globl  elf_plt_trace
    .type   elf_plt_trace,@function
    .align 16
elf_plt_trace:
    subq    $144,%rsp   / create some local storage
    movq    %rdi, SPRDIOFF(%rbp)
    movq    %rsi, SPRSIOFF(%rbp)
    movq    %rdx, SPRDXOFF(%rbp)
    movq    %rcx, SPRCXOFF(%rbp)
    movq    %r8, SPR8OFF(%rbp)
    movq    %r9, SPR9OFF(%rbp)
    movq    %r10, SPR10OFF(%rbp)
    movq    %r11, SPR11OFF(%rbp)
    movq    %rax, SPRAXOFF(%rbp)

    movq    SPDYNOFF(%rbp), %rax            / %rax = dyndata
    testb   $LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax)  / <link.h>
    je  .start_pltenter
    movq    SYMDEF_VALUE_OFF(%rax), %rdi
    movq    %rdi, SPDESTOFF(%rbp)       / save destination address
    jmp .end_pltenter

.start_pltenter:
    /*
     * save all registers into La_amd64_regs
     */
    leaq    SPLAREGOFF(%rbp), %rsi  / %rsi = &La_amd64_regs
    leaq    8(%rbp), %rdi
    movq    %rdi, 0(%rsi)       / la_rsp
    movq    0(%rbp), %rdi
    movq    %rdi, 8(%rsi)       / la_rbp
    movq    SPRDIOFF(%rbp), %rdi
    movq    %rdi, 16(%rsi)      / la_rdi
    movq    SPRSIOFF(%rbp), %rdi
    movq    %rdi, 24(%rsi)      / la_rsi
    movq    SPRDXOFF(%rbp), %rdi
    movq    %rdi, 32(%rsi)      / la_rdx
    movq    SPRCXOFF(%rbp), %rdi
    movq    %rdi, 40(%rsi)      / la_rcx
    movq    SPR8OFF(%rbp), %rdi
    movq    %rdi, 48(%rsi)      / la_r8
    movq    SPR9OFF(%rbp), %rdi
    movq    %rdi, 56(%rsi)      / la_r9

    /*
     * prepare for call to la_pltenter
     */
    movq    SPDYNOFF(%rbp), %r11        / %r11 = &dyndata
    leaq    SBFLAGS_OFF(%r11), %r9      / arg6 (&sb_flags)
    leaq    SPLAREGOFF(%rbp), %r8       / arg5 (&La_amd64_regs)
    movl    SYMNDX_OFF(%r11), %ecx      / arg4 (symndx)
    leaq    SYMDEF_OFF(%r11), %rdx      / arg3 (&Sym)
    movq    DEFLMP_OFF(%r11), %rsi      / arg2 (dlmp)
    movq    REFLMP_OFF(%r11), %rdi      / arg1 (rlmp)
    call    audit_pltenter@PLT
    movq    %rax, SPDESTOFF(%rbp)       / save calling address
.end_pltenter:

    /*
     * If *no* la_pltexit() routines exist
     * we do not need to keep the stack frame
     * before we call the actual routine.  Instead we
     * jump to it and remove our stack from the stack
     * at the same time.
     */
    movl    audit_flags(%rip), %eax
    andl    $AF_PLTEXIT, %eax       / value of audit.h:AF_PLTEXIT
    cmpl    $0, %eax
    je  .bypass_pltexit
    /*
     * Has the *nopltexit* flag been set for this entry point
     */
    movq    SPDYNOFF(%rbp), %r11        / %r11 = &dyndata
    testb   $LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11)
    je  .start_pltexit

.bypass_pltexit:
    /*
     * No PLTEXIT processing required.
     */
    movq    0(%rbp), %r11
    movq    %r11, -8(%rbp)          / move prev %rbp
    movq    SPDESTOFF(%rbp), %r11       / r11 == calling destination
    movq    %r11, 0(%rbp)           / store destination at top

    /
    / Restore registers
    /
    movq    SPRDIOFF(%rbp), %rdi
    movq    SPRSIOFF(%rbp), %rsi
    movq    SPRDXOFF(%rbp), %rdx
    movq    SPRCXOFF(%rbp), %rcx
    movq    SPR8OFF(%rbp), %r8
    movq    SPR9OFF(%rbp), %r9
    movq    SPR10OFF(%rbp), %r10
    movq    SPR11OFF(%rbp), %r11
    movq    SPRAXOFF(%rbp), %rax

    subq    $8, %rbp            / adjust %rbp for 'ret'
    movq    %rbp, %rsp          /
    /*
     * At this point, after a little doctoring, we should
     * have the following on the stack:
     *
     *  16(%rsp):  ret addr
     *  8(%rsp):  dest_addr
     *  0(%rsp):  Previous %rbp
     *
     * So - we pop the previous %rbp, and then
     * ret to our final destination.
     */
    popq    %rbp                /
    ret                 / jmp to final destination
                        / and clean up stack :)

.start_pltexit:
    /*
     * In order to call the destination procedure and then return
     * to audit_pltexit() for post analysis we must first grow
     * our stack frame and then duplicate the original callers
     * stack state.  This duplicates all of the arguements
     * that were to be passed to the destination procedure.
     */
    movq    %rbp, %rdi          /
    addq    $16, %rdi           /    %rdi = src
    movq    (%rbp), %rdx            /
    subq    %rdi, %rdx          /    %rdx == prev frame sz
    /*
     * If audit_argcnt > 0 then we limit the number of
     * arguements that will be duplicated to audit_argcnt.
     *
     * If (prev_stack_size > (audit_argcnt * 8))
     *  prev_stack_size = audit_argcnt * 8;
     */
    movl    audit_argcnt(%rip),%eax     /   %eax = audit_argcnt
    cmpl    $0, %eax
    jle .grow_stack
    leaq    (,%rax,8), %rax         /    %eax = %eax * 4
    cmpq    %rax,%rdx
    jle .grow_stack
    movq    %rax, %rdx
    /*
     * Grow the stack and duplicate the arguements of the
     * original caller.
     */
.grow_stack:
    subq    %rdx, %rsp          /    grow the stack
    movq    %rdx, SPPRVSTKOFF(%rbp)     /    -88(%rbp) == prev frame sz
    movq    %rsp, %rcx          /    %rcx = dest
    addq    %rcx, %rdx          /    %rdx == tail of dest
.while_base:
    cmpq    %rdx, %rcx          /   while (base+size >= src++) {
    jge .end_while          /
    movq    (%rdi), %rsi
    movq    %rsi,(%rcx)         /        *dest = *src
    addq    $8, %rdi            /    src++
    addq    $8, %rcx            /        dest++
    jmp .while_base         /    }

    /*
     * The above stack is now an exact duplicate of
     * the stack of the original calling procedure.
     */
.end_while:
    /
    / Restore registers
    /
    movq    SPRDIOFF(%rbp), %rdi
    movq    SPRSIOFF(%rbp), %rsi
    movq    SPRDXOFF(%rbp), %rdx
    movq    SPRCXOFF(%rbp), %rcx
    movq    SPR8OFF(%rbp), %r8
    movq    SPR9OFF(%rbp), %r9
    movq    SPR10OFF(%rbp), %r10
    movq    SPR11OFF(%rbp), %r11
    movq    SPRAXOFF(%rbp), %rax

    /*
     * Call to desitnation function - we'll return here
     * for pltexit monitoring.
     */
    call    *SPDESTOFF(%rbp)

    addq    SPPRVSTKOFF(%rbp), %rsp / cleanup dupped stack

    /
    / prepare for call to audit_pltenter()
    /
    movq    SPDYNOFF(%rbp), %r11        / %r11 = &dyndata
    movq    SYMNDX_OFF(%r11), %r8       / arg5 (symndx)
    leaq    SYMDEF_OFF(%r11), %rcx      / arg4 (&Sym)
    movq    DEFLMP_OFF(%r11), %rdx      / arg3 (dlmp)
    movq    REFLMP_OFF(%r11), %rsi      / arg2 (rlmp)
    movq    %rax, %rdi          / arg1 (returnval)
    call    audit_pltexit@PLT

    /*
     * Clean up after ourselves and return to the
     * original calling procedure.
     */

    /
    / Restore registers
    /
    movq    SPRDIOFF(%rbp), %rdi
    movq    SPRSIOFF(%rbp), %rsi
    movq    SPRDXOFF(%rbp), %rdx
    movq    SPRCXOFF(%rbp), %rcx
    movq    SPR8OFF(%rbp), %r8
    movq    SPR9OFF(%rbp), %r9
    movq    SPR10OFF(%rbp), %r10
    movq    SPR11OFF(%rbp), %r11
    // rax already contains return value

    movq    %rbp, %rsp          /
    popq    %rbp                /
    ret                 / return to caller
    .size   elf_plt_trace, .-elf_plt_trace
#endif

/*
 * We got here because a call to a function resolved to a procedure
 * linkage table entry.  That entry did a JMPL to the first PLT entry, which
 * in turn did a call to elf_rtbndr.
 *
 * the code sequence that got us here was:
 *
 * .PLT0:
 *  pushq   GOT+8(%rip) #GOT[1]
 *  jmp *GOT+16(%rip)   #GOT[2]
 *  nop
 *  nop
 *  nop
 *  nop
 *  ...
 * PLT entry for foo:
 *  jmp *name1@GOTPCREL(%rip)
 *  pushl   $rel.plt.foo
 *  jmp PLT0
 *
 * At entry, the stack looks like this:
 *
 *  return address          16(%rsp)
 *  $rel.plt.foo    (plt index) 8(%rsp)
 *  lmp             0(%rsp)
 *
 */
#if defined(lint)

extern unsigned long    elf_bndr(Rt_map *, unsigned long, caddr_t);

void
elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc)
{
    (void) elf_bndr(lmp, reloc, pc);
}

#else
    .weak   _elf_rtbndr
    _elf_rtbndr = elf_rtbndr

    ENTRY(elf_rtbndr)

    pushq   %rbp
    movq    %rsp, %rbp

    pushq   %rax        /* for SSE register count */
    pushq   %rdi        /* arg 0 .. */
    pushq   %rsi
    pushq   %rdx
    pushq   %rcx
    pushq   %r8
    pushq   %r9     /* .. arg 5 */
    pushq   %r10        /* call chain reg */

    movq    8(%rbp), %rdi   /* arg1 - *lmp */
    movq    16(%rbp), %rsi  /* arg2 - reloc index */
    movq    24(%rbp), %rdx  /* arg3 - pc of caller */
    call    elf_bndr@PLT    /* call elf_rtbndr(lmp, relndx, pc) */
    movq    %rax, 16(%rbp)  /* store final destination */

    popq    %r10
    popq    %r9
    popq    %r8
    popq    %rcx
    popq    %rdx
    popq    %rsi
    popq    %rdi
    popq    %rax

    movq    %rbp, %rsp
    popq    %rbp

    addq    $8, %rsp    /* pop 1st plt-pushed args */
                /* the second arguement is used */
                /* for the 'return' address to our */
                /* final destination */

    ret         /* invoke resolved function */
    .size   elf_rtbndr, .-elf_rtbndr
#endif