/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "assembler_x86.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "nativeInst_x86.hpp"
#include "oops/instanceOop.hpp"
#include "oops/methodOop.hpp"
#include "oops/objArrayKlass.hpp"
#include "oops/oop.inline.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#ifdef TARGET_OS_FAMILY_linux
# include "thread_linux.inline.hpp"
#endif
#ifdef TARGET_OS_FAMILY_solaris
# include "thread_solaris.inline.hpp"
#endif
#ifdef TARGET_OS_FAMILY_windows
# include "thread_windows.inline.hpp"
#endif
#ifdef TARGET_OS_FAMILY_bsd
# include "thread_bsd.inline.hpp"
#endif
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
// Declaration and definition of StubGenerator (no .hpp file).
// For a more detailed description of the stub routine structure
// see the comment in stubRoutines.hpp
#ifdef PRODUCT
#else
#endif
// Stub Code definitions
// pc is the instruction which we must emulate
// doing a no-op is fine: return garbage from the load
// therefore, compute npc
// request an async exception
// return address of next instruction to execute
return npc;
}
private:
#ifdef PRODUCT
#else
void inc_counter_np_(int& counter) {
// This can destroy rscratch1 if counter is far from the code cache
}
#define inc_counter_np(counter) \
#endif
// Call stubs are used to call Java from C
//
// Linux Arguments:
// c_rarg0: call wrapper address address
// c_rarg1: result address
// c_rarg2: result type BasicType
// c_rarg3: method methodOop
// c_rarg4: (interpreter) entry point address
// c_rarg5: parameters intptr_t*
// 16(rbp): parameter size (in words) int
// 24(rbp): thread Thread*
//
// [ return_from_Java ] <--- rsp
// [ argument word n ]
// ...
// -12 [ argument word 1 ]
// -11 [ saved r15 ] <--- rsp_after_call
// -10 [ saved r14 ]
// -9 [ saved r13 ]
// -8 [ saved r12 ]
// -7 [ saved rbx ]
// -6 [ call wrapper ]
// -5 [ result ]
// -4 [ result type ]
// -3 [ method ]
// -2 [ entry point ]
// -1 [ parameters ]
// 0 [ saved rbp ] <--- rbp
// 1 [ return address ]
// 2 [ parameter size ]
// 3 [ thread ]
//
// Windows Arguments:
// c_rarg0: call wrapper address address
// c_rarg1: result address
// c_rarg2: result type BasicType
// c_rarg3: method methodOop
// 48(rbp): (interpreter) entry point address
// 56(rbp): parameters intptr_t*
// 64(rbp): parameter size (in words) int
// 72(rbp): thread Thread*
//
// [ return_from_Java ] <--- rsp
// [ argument word n ]
// ...
// -28 [ argument word 1 ]
// -27 [ saved xmm15 ] <--- rsp_after_call
// [ saved xmm7-xmm14 ]
// -9 [ saved xmm6 ] (each xmm register takes 2 slots)
// -7 [ saved r15 ]
// -6 [ saved r14 ]
// -5 [ saved r13 ]
// -4 [ saved r12 ]
// -3 [ saved rdi ]
// -2 [ saved rsi ]
// -1 [ saved rbx ]
// 0 [ saved rbp ] <--- rbp
// 1 [ return address ]
// 2 [ call wrapper ]
// 3 [ result ]
// 4 [ result type ]
// 5 [ method ]
// 6 [ entry point ]
// 7 [ parameters ]
// 8 [ parameter size ]
// 9 [ thread ]
//
// Windows reserves the callers stack space for arguments 1-4.
// We spill c_rarg0-c_rarg3 to this space.
// Call stub stack layout word offsets from rbp
enum call_stub_layout {
#ifdef _WIN64
rbp_off = 0,
#else
rsp_after_call_off = -12,
r15_off = -11,
r14_off = -10,
r13_off = -9,
r12_off = -8,
rbx_off = -7,
call_wrapper_off = -6,
result_off = -5,
result_type_off = -4,
method_off = -3,
entry_point_off = -2,
parameters_off = -1,
rbp_off = 0,
retaddr_off = 1,
parameter_size_off = 2,
thread_off = 3
#endif
};
#ifdef _WIN64
}
#endif
"adjust this code");
// same as in generate_catch_exception()!
// same as in generate_catch_exception()!
// stub code
// save register parameters
#ifndef _WIN64
#endif
// save regs belonging to calling function
#ifdef _WIN64
for (int i = 6; i <= 15; i++) {
}
#else
{
}
#endif
// Load up thread register
#ifdef ASSERT
// make sure we have no pending exceptions
{
Label L;
}
#endif
// pass parameters if any
BLOCK_COMMENT("pass parameters if any");
// call Java function
BLOCK_COMMENT("call Java function");
BLOCK_COMMENT("call_stub_return_address:");
// store result depending on type (everything that is not
// T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
// handle T_INT case
// pop parameters
#ifdef ASSERT
// verify that threads correspond
{
Label L, S;
}
#endif
// restore regs belonging to calling function
#ifdef _WIN64
for (int i = 15; i >= 6; i--) {
}
#endif
#ifdef _WIN64
#else
#endif
// restore rsp
// return
// handle return types different from T_INT
return start;
}
// Return point for a Java call if there's an exception thrown in
// Java code. The exception is caught and transformed into a
// pending exception stored in JavaThread that can be tested from
// within the VM.
//
// Note: Usually the parameters are removed by the callee. In case
// of an exception crossing an activation frame boundary, that is
// not the case if the callee is compiled code => need to setup the
// rsp.
//
// rax: exception oop
// same as in generate_call_stub():
#ifdef ASSERT
// verify that threads correspond
{
Label L, S;
}
#endif
// set pending exception
// complete return to VM
"_call_stub_return_address must have been generated before");
return start;
}
// Continuation point for runtime calls returning with a pending
// exception. The pending exception check happened in the runtime
// or native call stub. The pending exception in Thread is
// converted into a Java-level exception.
//
// Contract with Java-level exception handlers:
// rax: exception
// rdx: throwing pc
//
// NOTE: At entry of this stub, exception-pc must be on stack !!
// Upon entry, the sp points to the return address returning into
// Java (interpreted or compiled) code; i.e., the return address
// becomes the throwing pc.
//
// Arguments pushed before the runtime call are still on the stack
// but the exception handler will reset the stack pointer ->
// ignore them. A potential result in registers can be ignored as
// well.
#ifdef ASSERT
// make sure this code is only executed if there is a pending exception
{
Label L;
}
#endif
// compute exception handler into rbx
BLOCK_COMMENT("call exception_handler_for_return_address");
// setup rax & rdx, remove return address & clear pending exception
#ifdef ASSERT
// make sure exception is set
{
Label L;
}
#endif
// continue at exception handler (return address removed)
// rax: exception
// rbx: exception handler
// rdx: throwing pc
return start;
}
// Support for jint atomic::xchg(jint exchange_value, volatile jint* dest)
//
// Arguments :
// c_rarg0: exchange_value
// c_rarg0: dest
//
// Result:
// *dest <- ex, return (orig *dest)
return start;
}
// Support for intptr_t atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
//
// Arguments :
// c_rarg0: exchange_value
// c_rarg1: dest
//
// Result:
// *dest <- ex, return (orig *dest)
return start;
}
// Support for jint atomic::atomic_cmpxchg(jint exchange_value, volatile jint* dest,
// jint compare_value)
//
// Arguments :
// c_rarg0: exchange_value
// c_rarg1: dest
// c_rarg2: compare_value
//
// Result:
// if ( compare_value == *dest ) {
// *dest = exchange_value
// return compare_value;
// else
// return *dest;
return start;
}
// Support for jint atomic::atomic_cmpxchg_long(jlong exchange_value,
// volatile jlong* dest,
// jlong compare_value)
// Arguments :
// c_rarg0: exchange_value
// c_rarg1: dest
// c_rarg2: compare_value
//
// Result:
// if ( compare_value == *dest ) {
// *dest = exchange_value
// return compare_value;
// else
// return *dest;
return start;
}
// Support for jint atomic::add(jint add_value, volatile jint* dest)
//
// Arguments :
// c_rarg0: add_value
// c_rarg1: dest
//
// Result:
// *dest += add_value
// return *dest;
return start;
}
// Support for intptr_t atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest)
//
// Arguments :
// c_rarg0: add_value
// c_rarg1: dest
//
// Result:
// *dest += add_value
// return *dest;
return start;
}
// Support for intptr_t OrderAccess::fence()
//
// Arguments :
//
// Result:
return start;
}
// Support for intptr_t get_previous_fp()
//
// This routine is used to find the previous frame pointer for the
// caller (current_frame_guess). This is used as part of debugging
// ps() is seemingly lost trying to find frames.
// This code assumes that caller current_frame_guess) has a frame.
return start;
}
// Support for intptr_t get_previous_sp()
//
// This routine is used to find the previous stack pointer for the
// caller.
return start;
}
//----------------------------------------------------------------------------------------------------
// Support for void verify_mxcsr()
//
// This routine is used with -Xcheck:jni to verify that native
// JNI code does not return to Java code without restoring the
// MXCSR register to our expected state.
if (CheckJNICalls) {
}
return start;
}
Label L;
return start;
}
Label L;
return start;
}
Label L;
return start;
}
Label L;
return start;
}
return start;
}
// The following routine generates a subroutine to throw an
// asynchronous UnknownError when an unsafe access gets a fault that
// could not be reasonably prevented by the programmer. (Example:
// FIXME: this probably needs alignment logic
BLOCK_COMMENT("call handle_unsafe_access");
return start;
}
// Non-destructive plausibility checks for oops
//
// Arguments:
// all args on stack!
//
// Stack after saving c_rarg3:
// [tos + 0]: saved c_rarg3
// [tos + 1]: saved c_rarg2
// [tos + 2]: saved r12 (several TemplateTable methods use it)
// [tos + 3]: saved flags
// [tos + 4]: return address
// * [tos + 5]: error message (char*)
// * [tos + 6]: object to verify (oop)
// * [tos + 7]: saved rax - saved by caller and bashed
// * [tos + 8]: saved r10 (rscratch1) - saved by caller
// * = popped on exit
// save c_rarg2 and c_rarg3
enum {
// After previous pushes.
// Before the call to MacroAssembler::debug(), see below.
};
// get object
// make sure object is 'reasonable'
// Check if the oop is in the right area of memory
// set r12 to heapbase for load_klass()
// make sure klass is 'reasonable'
// Check if the klass is in the right area of memory
// make sure klass' klass is 'reasonable'
// Check if the klass' klass is in the right area of memory
// return if everything seems ok
// handle errors
// will be ignored
// (rip is already
// already pushed)
// debug(char* msg, int64_t pc, int64_t regs[])
// We've popped the registers we'd saved (c_rarg3, c_rarg2 and flags), and
// pushed all the registers, so now the stack looks like:
// [tos + 0] 16 saved registers
// [tos + 16] return address
// * [tos + 17] error message (char*)
// * [tos + 18] object to verify (oop)
// * [tos + 19] saved rax - saved by caller and bashed
// * [tos + 20] saved r10 (rscratch1) - saved by caller
// * = popped on exit
BLOCK_COMMENT("call MacroAssembler::debug");
return start;
}
//
// Verify that a register contains clean 32-bits positive value
// (high 32-bits are 0) so it could be used in 64-bits shifts.
//
// Input:
// Rint - 32-bits value
// Rtmp - scratch
//
#ifdef ASSERT
Label L;
#endif
}
// Generate overlap test for array copy stubs
//
// Input:
// c_rarg0 - from
// c_rarg1 - to
// c_rarg2 - element count
//
// Output:
// rax - &from[element count - 1]
//
}
}
} else {
}
}
//
// Outputs:
// rdi - rcx
// rsi - rdx
// rdx - r8
// rcx - r9
//
// Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
// are non-volatile. r9 and r10 should not be used by the caller.
//
#ifdef _WIN64
"unexpected argument registers");
if (nargs >= 4)
if (nargs >= 4)
#else
"unexpected argument registers");
#endif
}
void restore_arg_regs() {
#ifdef _WIN64
#endif
}
// Generate code for an array write pre barrier
//
// addr - starting address
// count - element count
// tmp - scratch register
//
// Destroy no registers!
//
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
// With G1, don't generate the call if we statically know that the target in uninitialized
if (!dest_uninitialized) {
// exactly backwards!!
} else {
}
} else {
}
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
case BarrierSet::ModRef:
break;
default:
}
}
//
// Generate code for an array write post barrier
//
// Input:
// start - register containing starting address of destination array
// count - elements count
// scratch - scratch register
//
// The input registers are overwritten.
//
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
} else {
}
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
}
break;
default:
}
}
// Copy big chunks forward
//
// Inputs:
// end_from - source arrays end address
// end_to - destination array end address
// qword_count - 64-bits element count, negative
// to - scratch
// L_copy_bytes - entry label
// L_copy_8_bytes - exit label
//
if (UseUnalignedLoadStores) {
// Copy 64-bytes per iteration
if (UseAVX >= 2) {
} else {
}
// Copy trailing 32 bytes
if (UseAVX >= 2) {
} else {
}
if (UseAVX >= 2) {
// clean upper bits of YMM registers
__ vzeroupper();
}
} else {
// Copy 32-bytes per iteration
}
}
// Copy big chunks backward
//
// Inputs:
// from - source arrays address
// dest - destination array address
// qword_count - 64-bits element count
// to - scratch
// L_copy_bytes - entry label
// L_copy_8_bytes - exit label
//
if (UseUnalignedLoadStores) {
// Copy 64-bytes per iteration
if (UseAVX >= 2) {
} else {
}
// Copy trailing 32 bytes
if (UseAVX >= 2) {
} else {
}
if (UseAVX >= 2) {
// clean upper bits of YMM registers
__ vzeroupper();
}
} else {
// Copy 32-bytes per iteration
}
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
//
// we let the hardware handle it. The one to eight bytes within words,
// dwords or qwords that span cache line boundaries will still be loaded
// and stored atomically.
//
// Side Effects:
// disjoint_byte_copy_entry is set to the no-overlap entry point
// used by generate_conjoint_byte_copy().
//
// End pointers are inclusive, and if count is not zero they point
// to the last unit copied: end_to[0] := end_from[0]
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'count' are now valid
// Copy from low to high addresses. Use 'to' as scratch.
// Copy trailing qwords
// Check for and copy trailing dword
// Check for and copy trailing word
// Check for and copy trailing byte
// Copy in multi-bytes chunks
return start;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
//
// we let the hardware handle it. The one to eight bytes within words,
// dwords or qwords that span cache line boundaries will still be loaded
// and stored atomically.
//
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'count' are now valid
// Copy from high to low addresses.
// Check for and copy trailing byte
// Check for and copy trailing word
// Check for and copy trailing dword
// Copy trailing qwords
// Copy in multi-bytes chunks
return start;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
//
// let the hardware handle it. The two or four words within dwords
// or qwords that span cache line boundaries will still be loaded
// and stored atomically.
//
// Side Effects:
// disjoint_short_copy_entry is set to the no-overlap entry point
// used by generate_conjoint_short_copy().
//
// End pointers are inclusive, and if count is not zero they point
// to the last unit copied: end_to[0] := end_from[0]
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'count' are now valid
// Copy from low to high addresses. Use 'to' as scratch.
// Copy trailing qwords
// Original 'dest' is trashed, so we can't use it as a
// base register for a possible trailing word copy
// Check for and copy trailing dword
// Check for and copy trailing word
// Copy in multi-bytes chunks
return start;
}
BLOCK_COMMENT("Entry:");
return start;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
//
// let the hardware handle it. The two or four words within dwords
// or qwords that span cache line boundaries will still be loaded
// and stored atomically.
//
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'count' are now valid
// Copy from high to low addresses. Use 'to' as scratch.
// Check for and copy trailing word
// Check for and copy trailing dword
// Copy trailing qwords
// Copy in multi-bytes chunks
return start;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
// is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
//
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomicly.
//
// Side Effects:
// disjoint_int_copy_entry is set to the no-overlap entry point
// used by generate_conjoint_int_oop_copy().
//
const char *name, bool dest_uninitialized = false) {
// End pointers are inclusive, and if count is not zero they point
// to the last unit copied: end_to[0] := end_from[0]
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
if (is_oop) {
}
// 'from', 'to' and 'count' are now valid
// Copy from low to high addresses. Use 'to' as scratch.
// Copy trailing qwords
// Check for and copy trailing dword
if (is_oop) {
}
// Copy in multi-bytes chunks
return start;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
// is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
//
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomicly.
//
bool dest_uninitialized = false) {
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
if (is_oop) {
// no registers are destroyed by this call
}
// 'from', 'to' and 'count' are now valid
// Copy from high to low addresses. Use 'to' as scratch.
// Check for and copy trailing dword
// Copy trailing qwords
if (is_oop) {
}
// Copy in multi-bytes chunks
if (is_oop) {
}
return start;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
// ignored
// is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
//
// Side Effects:
// disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
// no-overlap entry point used by generate_conjoint_long_oop_copy().
//
const char *name, bool dest_uninitialized = false) {
// End pointers are inclusive, and if count is not zero they point
// to the last unit copied: end_to[0] := end_from[0]
// Save no-overlap entry point for generate_conjoint_long_oop_copy()
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'qword_count' are now valid
if (is_oop) {
// Save to and count for store barrier
// no registers are destroyed by this call
}
// Copy from low to high addresses. Use 'to' as scratch.
// Copy trailing qwords
if (is_oop) {
} else {
}
// Copy in multi-bytes chunks
if (is_oop) {
}
if (is_oop) {
} else {
}
return start;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
// ignored
// is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
//
const char *name, bool dest_uninitialized = false) {
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
// r9 and r10 may be used to save non-volatile registers
// 'from', 'to' and 'qword_count' are now valid
if (is_oop) {
// Save to and count for store barrier
// No registers are destroyed by this call
}
// Copy trailing qwords
if (is_oop) {
} else {
}
// Copy in multi-bytes chunks
if (is_oop) {
}
if (is_oop) {
} else {
}
return start;
}
// Helper for generating a dynamic type check.
// Smashes no registers.
BLOCK_COMMENT("type_check:");
// Fall through on failure!
}
//
// Generate checkcasting array copy stub
//
// Input:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - element count, treated as ssize_t, can be zero
// c_rarg3 - size_t ckoff (super_check_offset)
// not Win64
// c_rarg4 - oop ckval (super_klass)
// Win64
// rsp+40 - oop ckval (super_klass)
//
// Output:
// rax == 0 - success
// rax == -1^K - failure, where K is partial transfer count
//
bool dest_uninitialized = false) {
// Input registers (after setup_arg_regs)
// Registers used as temps (r13, r14 are save-on-entry)
// End pointers are inclusive, and if length is not zero they point
// to the last unit copied: end_to[0] := end_from[0]
//---------------------------------------------------------------
// Assembler stub will be used for this call to arraycopy
// if the two arrays are subtypes of Object[] but the
// destination array type is not equal to or a supertype
// of the source type. Each element must be separately
// checked.
#ifdef ASSERT
// caller guarantees that the arrays really are different
// otherwise, we would have to make conjoint checks
{ Label L;
}
#endif //ASSERT
// ckoff => rcx, ckval => r8
// r9 and r10 may be used to save non-volatile registers
#ifdef _WIN64
// last argument (#4) is on stack on Win64
#endif
// Caller of this entry point must set up the argument registers.
BLOCK_COMMENT("Entry:");
}
// allocate spill slots for r13, r14
enum {
};
// check that int operands are properly extended to size_t
#ifdef ASSERT
BLOCK_COMMENT("assert consistent ckoff/ckval");
// The ckoff and ckval must be mutually consistent,
// even though caller generates both.
{ Label L;
}
#endif //ASSERT
// Loop-invariant addresses. They are exclusive end pointers.
// Loop-variant addresses. They assume post-incremented count < 0.
// Copy from low to high addresses, indexed from the end of each array.
// Empty array: Nothing to do.
// ======== begin loop ========
// (Loop is rotated; its entry is L_load_element.)
// Loop control:
// for (count = -count; count != 0; count++)
// Base pointers src, dst are biased by 8*(count-1),to last element.
// ======== loop entry is here ========
// ======== end loop ========
// It was a real error; we must depend on the caller to finish the job.
// Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
// Emit GC store barriers for the oops we have copied (r14 + rdx),
// and report their number to the caller.
// Come here on success only.
// Common exit point (success or failure).
return start;
}
//
// Generate 'unsafe' array copy stub
// Though just as safe as the other stubs, it takes an unscaled
// size_t argument instead of an element count.
//
// Input:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - byte count, treated as ssize_t, can be zero
//
// Examines the alignment of the operands and dispatches
// to a long, int, short, or byte copy loop.
//
// Input registers (before setup_arg_regs)
// Register used as a temp
// bump this on entry, not on exit:
return start;
}
// Perform range checks on the proposed arraycopy.
// Kills temp, but nothing else.
// Also, clean the sign bits of src_pos and dst_pos.
BLOCK_COMMENT("arraycopy_range_checks:");
// if (src_pos + length > arrayOop(src)->length()) FAIL;
// if (dst_pos + length > arrayOop(dst)->length()) FAIL;
// Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
// Move with sign extension can be used since they are positive.
BLOCK_COMMENT("arraycopy_range_checks done");
}
//
// Generate generic array copy stubs
//
// Input:
// c_rarg0 - src oop
// c_rarg1 - src_pos (32-bits)
// c_rarg2 - dst oop
// c_rarg3 - dst_pos (32-bits)
// not Win64
// c_rarg4 - element count (32-bits)
// Win64
// rsp+40 - element count (32-bits)
//
// Output:
// rax == 0 - success
// rax == -1^K - failure, where K is partial transfer count
//
// Input registers
#ifndef _WIN64
#else
#endif
}
// Short-hop target to L_failed. Makes for denser prologue code.
// bump this on entry, not on exit:
//-----------------------------------------------------------------------
// Assembler stub will be used for this call to arraycopy
// if the following conditions are met:
//
// (1) src and dst must not be null.
// (2) src_pos must not be negative.
// (3) dst_pos must not be negative.
// (4) length must not be negative.
// (5) src klass and dst klass should be the same and not NULL.
// (6) src and dst should be arrays.
// (7) src_pos + length must not exceed length of src.
// (8) dst_pos + length must not exceed length of dst.
//
// if (src == NULL) return -1;
// if (src_pos < 0) return -1;
// if (dst == NULL) return -1;
// if (dst_pos < 0) return -1;
// The first four tests are very dense code,
// but not quite dense enough to put four
// jumps in a 16-byte instruction fetch buffer.
// That's good, because some branch predicters
// do not like jumps so close together.
// Make sure of this.
// registers used as temp
// if (length < 0) return -1;
#ifdef ASSERT
// assert(src->klass() != NULL);
{
BLOCK_COMMENT("assert klasses not null {");
BLOCK_COMMENT("} assert klasses not null done");
}
#endif
// Load layout helper (32-bits)
//
// |array_tag| | header_size | element_type | |log2_element_size|
// 32 30 24 16 8 2 0
//
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
//
// Handle objArrays completely differently...
// if (src->klass() != dst->klass()) return -1;
// if (!src->is_Array()) return -1;
// At this point, it is known to be a typeArray (array_tag 0x3).
#ifdef ASSERT
{
BLOCK_COMMENT("assert primitive array {");
Label L;
BLOCK_COMMENT("} assert primitive array done");
}
#endif
// typeArrayKlass
//
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
//
BLOCK_COMMENT("choose copy loop based on element size");
// next registers should be set before the jump to corresponding stub
// 'from', 'to', 'count' registers should be set in such order
// since they are the same as 'src', 'src_pos', 'dst'.
#ifdef ASSERT
{
BLOCK_COMMENT("assert long copy {");
Label L;
BLOCK_COMMENT("} assert long copy done");
}
#endif
// objArrayKlass
// live at this point: r10_src_klass, r11_length, src[_pos], dst[_pos]
// test array classes for subtyping
// Identically typed arrays can be copied without element-wise checks.
// live at this point: r10_src_klass, r11_length, rax (dst_klass)
{
// Before looking at dst.length, make sure dst is also an objArray.
// It is safe to examine both src.length and dst.length.
// Marshal the base address arguments now, freeing registers.
// Generate the type check.
// Fetch destination element klass from the objArrayKlass header.
// the checkcast_copy loop needs two extra arguments:
// Set up arguments for checkcast_copy_entry.
setup_arg_regs(4);
}
return start;
}
void generate_arraycopy_stubs() {
"jbyte_disjoint_arraycopy");
"jbyte_arraycopy");
"jshort_disjoint_arraycopy");
StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
"jshort_arraycopy");
"jint_disjoint_arraycopy");
&entry_jint_arraycopy, "jint_arraycopy");
"jlong_disjoint_arraycopy");
&entry_jlong_arraycopy, "jlong_arraycopy");
if (UseCompressedOops) {
"oop_disjoint_arraycopy");
&entry_oop_arraycopy, "oop_arraycopy");
"oop_disjoint_arraycopy_uninit",
/*dest_uninitialized*/true);
NULL, "oop_arraycopy_uninit",
/*dest_uninitialized*/true);
} else {
"oop_disjoint_arraycopy");
&entry_oop_arraycopy, "oop_arraycopy");
"oop_disjoint_arraycopy_uninit",
/*dest_uninitialized*/true);
NULL, "oop_arraycopy_uninit",
/*dest_uninitialized*/true);
}
StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
/*dest_uninitialized*/true);
// We don't generate specialized code for HeapWord-aligned source
// arrays, so just use the code we've already generated
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
}
void generate_math_stubs() {
{
}
{
}
{
}
{
}
{
}
{
__ exp_with_fallback(0);
}
{
__ pow_with_fallback(0);
}
}
// AES intrinsic stubs
return start;
}
// Utility routine for loading a 128-bit key word in little endian format
// can optionally specify that the shuffle mask is already in an xmmregister
if (xmm_shuf_mask != NULL) {
} else {
}
}
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
//
// On win64 xmm6-xmm15 must be preserved so don't use them.
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// For encryption, the java expanded key ordering is just what we need
// we don't know if the key is aligned, hence not using load-execute form
return start;
}
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
//
// On win64 xmm6-xmm15 must be preserved so don't use them.
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// for decryption java expanded key ordering is rotated one position from what we want
// so we start from 0x10 here and hit 0x00 last
// we don't know if the key is aligned, hence not using load-execute form
// for decryption the aesdeclast operation is always on key+0x00
return start;
}
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
// c_rarg3 - r vector byte array address
// c_rarg4 - input length
//
// and left with the results of the last encryption block
#ifndef _WIN64
#else
#endif
// xmm register assignments for the loops below
// keys 0-10 preloaded into xmm2-xmm12
#ifdef _WIN64
// on win64, fill len_reg from stack position
// save the xmm registers which must be preserved 6-15
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
}
#endif
// load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
offset += 0x10;
}
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// 128 bit code follows here
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
#ifdef _WIN64
// restore xmm regs belonging to calling function
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
}
#endif
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
// 192-bit code follows here (could be changed to use more xmm registers)
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
// 256-bit code follows here (could be changed to use more xmm registers)
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
return start;
}
// to hide instruction latency
//
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
// c_rarg3 - r vector byte array address
// c_rarg4 - input length
//
// and left with the results of the last encryption block
#ifndef _WIN64
#else
#endif
// keys 0-10 preloaded into xmm2-xmm12
#ifdef _WIN64
// on win64, fill len_reg from stack position
// save the xmm registers which must be preserved 6-15
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
}
#endif
// the java expanded key ordering is rotated one position from what we want
// so we start from 0x10 here and hit 0x00 last
// load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
offset += 0x10;
}
// registers holding the four results in the parallelized loop
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// 128-bit code follows here, parallelized
__ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0*AESBlockSize)); // get next 4 blocks into xmmresult registers
}
// for each result, xor with the r vector of previous cipher block
__ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3*AESBlockSize)); // this will carry over to next set of blocks
__ movdqu(Address(to, pos, Address::times_1, 0*AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output
// registers used in the non-parallelized loops
// xmm register assignments for the loops below
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
__ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object
#ifdef _WIN64
// restore regs belonging to calling function
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
}
#endif
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
// 192-bit code follows here (could be optimized to use parallelism)
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
// 256-bit code follows here (could be optimized to use parallelism)
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
return start;
}
// Continuation point for throwing of implicit exceptions that are
// not handled in the current activation. Fabricates an exception
// oop and initiates normal exception dispatching in this
// frame. Since we need to preserve callee-saved values (currently
// only for C2, but done for C1 as well) we need a callee-saved oop
// map and therefore have to make these stubs into RuntimeStubs
// rather than BufferBlobs. If the compiler needs all registers to
// be preserved between the fault point and the exception handler
// then it must assume responsibility for that in
// AbstractCompiler::continuation_for_implicit_null_exception or
// continuation_for_implicit_division_by_zero_exception. All other
// implicit exceptions (e.g., NullPointerException or
// AbstractMethodError on entry) are either at call sites or
// otherwise assume that stack unwinding will be initiated, so
// caller saved registers were assumed volatile in the compiler.
// Information about frame layout at time of blocking runtime call.
// Note that we only have to preserve callee-saved registers since
// the compilers are responsible for supplying a continuation point
// if they expect all registers to be preserved.
enum layout {
};
// This is an inlined and slightly modified version of call_VM
// which has the ability to fetch the return PC out of
// thread-local storage and also sets up last_Java_sp slightly
// differently than the real call_VM
// return address and rbp are already in place
// Set up last_Java_sp and last_Java_fp
// Call runtime
}
}
BLOCK_COMMENT("call runtime_entry");
// Generate oop map
__ reset_last_Java_frame(true, true);
// check for pending exceptions
#ifdef ASSERT
Label L;
#endif // ASSERT
// codeBlob framesize is in words (not VMRegImpl::slot_size)
&code,
oop_maps, false);
return stub->entry_point();
}
// Initialization
void generate_initial() {
// Generates all stubs and initializes the entry points
// This platform-specific stub is needed by generate_call_stub()
// entry points that exist in all platforms Note: This is code
// that could be shared among different platforms - however the
// benefit seems to be smaller than the disadvantage of having a
// much more complicated generator structure. See also comment in
// stubRoutines.hpp.
// is referenced by megamorphic call
// atomic calls
// platform dependent
// Build this early so it's available for the interpreter.
generate_throw_exception("StackOverflowError throw_exception",
}
void generate_all() {
// Generates all stubs and initializes the entry points
// These entry points require SharedInfo::stack0 to be set up in
// non-core builds and need to be relocatable, so they each
// fabricate a RuntimeStub internally.
generate_throw_exception("AbstractMethodError throw_exception",
generate_throw_exception("IncompatibleClassChangeError throw_exception",
generate_throw_exception("NullPointerException at call throw_exception",
// entry points that are platform specific
// support for verify_oop (must happen after universe_init)
// arraycopy stubs used by compilers
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
}
}
public:
if (all) {
generate_all();
} else {
}
}
}; // end class declaration
}