/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2012 Joyent, Inc. All rights reserved.
*/
#if defined(lint)
#include <_rtld.h>
#include <_audit.h>
#include <_elf.h>
#include <sys/auxv_386.h>
/* ARGSUSED0 */
int
{
return (0);
}
#else
#include <link.h>
#include <_audit.h>
#include <sys/asm_linkage.h>
#include <sys/auxv_386.h>
.file "boot_elf.s"
.text
/*
* On entry the 'glue code' has already done the following:
*
* pushq %rbp
* movq %rsp, %rbp
* subq $0x10, %rsp
* leaq trace_fields(%rip), %r11
* movq %r11, -0x8(%rbp)
* movq $elf_plt_trace, %r11
* jmp *%r11
*
* so - -8(%rbp) contains the dyndata ptr
*
* 0x0 Addr *reflmp
* 0x8 Addr *deflmp
* 0x10 Word symndx
* 0x14 Word sb_flags
* 0x18 Sym symdef.st_name
* 0x1c symdef.st_info
* 0x1d symdef.st_other
* 0x1e symdef.st_shndx
* 0x20 symdef.st_value
* 0x28 symdef.st_size
*
* Also note - on entry 16 bytes have already been subtracted
* from the %rsp. The first 8 bytes is for the dyn_data_ptr,
* the second 8 bytes are to align the stack and are available
* for use.
*/
/*
* Local stack space storage for elf_plt_trace is allocated
* as follows:
*
* First - before we got here - %rsp has been decremented
* by 0x10 to make space for the dyndata ptr (and another
* free word). In addition to that, we create space
* for the following:
*
* La_amd64_regs 8 * 8: 64
* prev_stack_size 8 8
* Saved regs:
* %rdi 8
* %rsi 8
* %rdx 8
* %rcx 8
* %r8 8
* %r9 8
* %r10 8
* %r11 8
* %rax 8
* =======
* Subtotal: 144 (32byte aligned)
*
* Saved Media Regs (used to pass floating point args):
* %xmm0 - %xmm7 32 * 8: 256
* =======
* Total: 400 (32byte aligned)
*
* So - will subtract the following to create enough space
*
* -8(%rbp) store dyndata ptr
* -16(%rbp) store call destination
* -80(%rbp) space for La_amd64_regs
* -88(%rbp) prev stack size
* The next %rbp offsets are only true if the caller had correct stack
* alignment. See note above SPRDIOFF for why we use %rsp alignment to
* access these stack fields.
* -96(%rbp) entering %rdi
* -104(%rbp) entering %rsi
* -112(%rbp) entering %rdx
* -120(%rbp) entering %rcx
* -128(%rbp) entering %r8
* -136(%rbp) entering %r9
* -144(%rbp) entering %r10
* -152(%rbp) entering %r11
* -160(%rbp) entering %rax
* -192(%rbp) entering %xmm0
* -224(%rbp) entering %xmm1
* -256(%rbp) entering %xmm2
* -288(%rbp) entering %xmm3
* -320(%rbp) entering %xmm4
* -384(%rbp) entering %xmm5
* -416(%rbp) entering %xmm6
* -448(%rbp) entering %xmm7
*
*/
/*
* The next set of offsets are relative to %rsp.
* We guarantee %rsp is ABI compliant 32-byte aligned. This guarantees the
* ymm registers are saved to 32-byte aligned addresses.
* %rbp may only be 8 byte aligned if we came in from non-ABI compliant code.
*/
#define SPXMM7OFF 0
/* See elf_rtbndr for explanation behind org_scapset */
.extern org_scapset
.align 16
/*
* Enforce ABI 32-byte stack alignment here.
* The next andq instruction does this pseudo code:
* If %rsp is 8 byte aligned then subtract 8 from %rsp.
*/
/*
* save all registers into La_amd64_regs
*/
/*
* prepare for call to la_pltenter
*/
/*
* If *no* la_pltexit() routines exist
* we do not need to keep the stack frame
* before we call the actual routine. Instead we
* jump to it and remove our stack from the stack
* at the same time.
*/
/*
* Has the *nopltexit* flag been set for this entry point
*/
/*
* No PLTEXIT processing required.
*/
/
/
/*
* At this point, after a little doctoring, we should
* have the following on the stack:
*
* 16(%rsp): ret addr
* 8(%rsp): dest_addr
* 0(%rsp): Previous %rbp
*
* So - we pop the previous %rbp, and then
* ret to our final destination.
*/
/*
* In order to call the destination procedure and then return
* to audit_pltexit() for post analysis we must first grow
* our stack frame and then duplicate the original callers
* stack state. This duplicates all of the arguements
* that were to be passed to the destination procedure.
*/
/*
* If audit_argcnt > 0 then we limit the number of
* arguements that will be duplicated to audit_argcnt.
*
* If (prev_stack_size > (audit_argcnt * 8))
* prev_stack_size = audit_argcnt * 8;
*/
/*
* Grow the stack and duplicate the arguements of the
* original caller.
*
* We save %rsp in %r11 since we need to use the current rsp for
* accessing the registers saved in our stack frame.
*/
jmp .while_base / }
/*
* The above stack is now an exact duplicate of
* the stack of the original calling procedure.
*/
/
/
/*
* Call to desitnation function - we'll return here
* for pltexit monitoring.
*/
/
/
/*
* Clean up after ourselves and return to the
* original calling procedure.
*/
/
/
// rax already contains return value
#endif
/*
* We got here because a call to a function resolved to a procedure
* linkage table entry. That entry did a JMPL to the first PLT entry, which
* in turn did a call to elf_rtbndr.
*
* the code sequence that got us here was:
*
* .PLT0:
* pushq GOT+8(%rip) #GOT[1]
* jmp *GOT+16(%rip) #GOT[2]
* nop
* nop
* nop
* nop
* ...
* PLT entry for foo:
* jmp *name1@GOTPCREL(%rip)
* pushl $rel.plt.foo
* jmp PLT0
*
* At entry, the stack looks like this:
*
* return address 16(%rsp)
* $rel.plt.foo (plt index) 8(%rsp)
* lmp 0(%rsp)
*
*/
#if defined(lint)
void
{
}
#else
/*
* The PLT code that landed us here placed 2 arguments on the stack as
* arguments to elf_rtbndr.
* Additionally the pc of caller is below these 2 args.
* Our stack will look like this after we establish a stack frame with
* push %rbp; movq %rsp, %rbp sequence:
*
* 8(%rbp) arg1 - *lmp
* 16(%rbp), %rsi arg2 - reloc index
* 24(%rbp), %rdx arg3 - pc of caller
*/
/*
* Possible arguments for the resolved function are in registers as per
* the AMD64 ABI. We must save on the local stack all possible register
* arguments before interposing functions to resolve the called function.
* Possible arguments must be restored before invoking the resolved function.
*
* Before the AVX instruction set enhancements to AMD64 there were no changes in
* the set of registers and their sizes across different processors. With AVX,
* the xmm registers became the lower 128 bits of the ymm registers. Because of
* this, we need to conditionally save 256 bits instead of 128 bits. Regardless
* of whether we have ymm registers or not, we're always going to push the stack
* space assuming that we do to simplify the code.
*
* Local stack space storage for elf_rtbndr is allocated as follows:
*
* Saved regs:
* %rax 8
* %rdi 8
* %rsi 8
* %rdx 8
* %rcx 8
* %r8 8
* %r9 8
* %r10 8
* =======
* Subtotal: 64 (32byte aligned)
*
* Saved Media Regs (used to pass floating point args):
* %ymm0 - %ymm7 32 * 8 256
* =======
* Total: 320 (32byte aligned)
*
* So - will subtract the following to create enough space
*
* 0(%rsp) save %rax
* 8(%rsp) save %rdi
* 16(%rsp) save %rsi
* 24(%rsp) save %rdx
* 32(%rsp) save %rcx
* 40(%rsp) save %r8
* 48(%rsp) save %r9
* 56(%rsp) save %r10
* 64(%rsp) save %ymm0
* 96(%rsp) save %ymm1
* 128(%rsp) save %ymm2
* 160(%rsp) save %ymm3
* 192(%rsp) save %ymm4
* 224(%rsp) save %ymm5
* 256(%rsp) save %ymm6
* 288(%rsp) save %ymm7
*
* Note: Some callers may use 8-byte stack alignment instead of the
* registers because %rbp may not be 16-byte aligned. We guarantee %rsp
* is 16-byte aligned in the function preamble.
*/
/*
* As the registers may either be xmm or ymm, we've left the name as xmm, but
* increased the offset between them to always cover the xmm and ymm cases.
*/
/*
* The org_scapset is a global variable that is a part of rtld. It
* contains the capabilities that the kernel has told us are supported
* (auxv_hwcap). This is necessary for determining whether or not we
* need to save and restore AVX registers or simple SSE registers. Note,
* that the field we care about is currently at offset 0, if that
* changes, this code will have to be updated.
*/
.extern org_scapset
/*
* Some libraries may (incorrectly) use non-ABI compliant 8-byte stack
* alignment. Enforce ABI 16-byte stack alignment here.
* The next andq instruction does this pseudo code:
* If %rsp is 8 byte aligned then subtract 8 from %rsp.
*/
/*
* Our xmm registers could secretly by ymm registers in disguise.
*/
.save_xmm:
.save_ymm:
/*
* Restore possible arguments before invoking resolved function. We
* check the xmm vs. ymm regs first so we can use the others.
*/
/* the second arguement is used */
/* for the 'return' address to our */
/* final destination */
ret /* invoke resolved function */
#endif