/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "assembler_x86.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "nativeInst_x86.hpp"
#include "oops/instanceOop.hpp"
#include "oops/methodOop.hpp"
#include "oops/objArrayKlass.hpp"
#include "oops/oop.inline.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#ifdef TARGET_OS_FAMILY_linux
# include "thread_linux.inline.hpp"
#endif
#ifdef TARGET_OS_FAMILY_solaris
# include "thread_solaris.inline.hpp"
#endif
#ifdef TARGET_OS_FAMILY_windows
# include "thread_windows.inline.hpp"
#endif
#ifdef TARGET_OS_FAMILY_bsd
# include "thread_bsd.inline.hpp"
#endif
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
// Declaration and definition of StubGenerator (no .hpp file).
// For a more detailed description of the stub routine structure
// see the comment in stubRoutines.hpp
#ifdef PRODUCT
#else
#endif
// -------------------------------------------------------------------------------------------------------------------------
// Stub Code definitions
// pc is the instruction which we must emulate
// doing a no-op is fine: return garbage from the load
// therefore, compute npc
// request an async exception
// return address of next instruction to execute
return npc;
}
private:
#ifdef PRODUCT
#else
void inc_counter_np_(int& counter) {
}
#define inc_counter_np(counter) \
#endif //PRODUCT
#ifndef PRODUCT
switch (t) {
}
#endif //PRODUCT
}
//------------------------------------------------------------------------------------------------------------------------
// Call stubs are used to call Java from C
//
// [ return_from_Java ] <--- rsp
// [ argument word n ]
// ...
// -N [ argument word 1 ]
// -7 [ Possible padding for stack alignment ]
// -6 [ Possible padding for stack alignment ]
// -5 [ Possible padding for stack alignment ]
// -4 [ mxcsr save ] <--- rsp_after_call
// -3 [ saved rbx, ]
// -2 [ saved rsi ]
// -1 [ saved rdi ]
// 0 [ saved rbp, ] <--- rbp,
// 1 [ return address ]
// 2 [ ptr. to call wrapper ]
// 3 [ result ]
// 4 [ result_type ]
// 5 [ method ]
// 6 [ entry_point ]
// 7 [ parameters ]
// 8 [ parameter_size ]
// 9 [ thread ]
// stub code parameters / addresses
bool sse_save = false;
// stub code
// save rdi, rsi, & rbx, according to C calling conventions
// save and initialize %mxcsr
if (sse_save) {
}
// make sure the control word is correct.
#ifdef ASSERT
// make sure we have no pending exceptions
{ Label L;
}
#endif
// pass parameters if any
BLOCK_COMMENT("pass parameters if any");
// parameter passing loop
// Copy Java parameters in reverse order (receiver last)
// Note that the argument order is inverted in the process
// source is rdx[rcx: N-1..0]
// dest is rsp[rbx: 0..N-1]
// get parameter
// call Java function
BLOCK_COMMENT("call Java function");
BLOCK_COMMENT("call_stub_return_address:");
#ifdef COMPILER2
{
if (UseSSE >= 2) {
} else {
for (int i = 1; i < 8; i++) {
}
// UseSSE <= 1 so double result should be left on TOS
if (UseSSE == 0) {
// UseSSE == 0 so float result should be left on TOS
}
}
}
#endif // COMPILER2
// store result depending on type
// (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
// handle T_INT case
// check that FPU stack is empty
// pop parameters
// restore %mxcsr
if (sse_save) {
}
// restore rdi, rsi and rbx,
// return
// handle return types different from T_INT
// interpreter uses xmm0 for return values
if (UseSSE >= 1) {
} else {
}
// interpreter uses xmm0 for return values
if (UseSSE >= 2) {
} else {
}
return start;
}
//------------------------------------------------------------------------------------------------------------------------
// Return point for a Java call if there's an exception thrown in Java code.
// The exception is caught and transformed into a pending exception stored in
// JavaThread that can be tested from within the VM.
//
// Note: Usually the parameters are removed by the callee. In case of an exception
// crossing an activation frame boundary, that is not the case if the callee
// is compiled code => need to setup the rsp.
//
// rax,: exception oop
// get thread directly
#ifdef ASSERT
// verify that threads correspond
{ Label L;
}
#endif
// set pending exception
// complete return to VM
assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
return start;
}
//------------------------------------------------------------------------------------------------------------------------
// Continuation point for runtime calls returning with a pending exception.
// The pending exception check happened in the runtime or native call stub.
// The pending exception in Thread is converted into a Java-level exception.
//
// Contract with Java-level exception handlers:
// rax: exception
// rdx: throwing pc
//
// NOTE: At entry of this stub, exception-pc must be on stack !!
// other registers used in this stub
// Upon entry, the sp points to the return address returning into Java
// (interpreted or compiled) code; i.e., the return address becomes the
// throwing pc.
//
// Arguments pushed before the runtime call are still on the stack but
// the exception handler will reset the stack pointer -> ignore them.
// A potential result in registers can be ignored as well.
#ifdef ASSERT
// make sure this code is only executed if there is a pending exception
{ Label L;
}
#endif
// compute exception handler into rbx,
BLOCK_COMMENT("call exception_handler_for_return_address");
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc);
// setup rax & rdx, remove return address & clear pending exception
#ifdef ASSERT
// make sure exception is set
{ Label L;
}
#endif
// Verify that there is really a valid exception in RAX.
// continue at exception handler (return address removed)
// rax: exception
// rbx: exception handler
// rdx: throwing pc
return start;
}
//----------------------------------------------------------------------------------------------------
// Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest)
//
// xchg exists as far back as 8086, lock needed for MP only
// Stack layout immediately after call:
//
// 0 [ret addr ] <--- rsp
// 1 [ ex ]
// 2 [ dest ]
//
// Result: *dest <- ex, return (old *dest)
//
// Note: win32 does not currently use this code
return start;
}
//----------------------------------------------------------------------------------------------------
// Support for void verify_mxcsr()
//
// This routine is used with -Xcheck:jni to verify that native
// JNI code does not return to Java code without restoring the
// MXCSR register to our expected state.
if (CheckJNICalls && UseSSE > 0 ) {
}
return start;
}
//---------------------------------------------------------------------------
// Support for void verify_fpu_cntrl_wrd()
//
// This routine is used with -Xcheck:jni to verify that native
// JNI code does not return to Java code without restoring the
// FP control word to our expected state.
if (CheckJNICalls) {
}
return start;
}
//---------------------------------------------------------------------------
// Wrapper for slow-case handling of double-to-integer conversion
// d2i or f2i fast case failed either because it is nan or because
// Input: FPU TOS: float value
// Output: rax, (rdx): integer (long) result
// Capture info about frame layout
};
// Save outgoing argument to stack across push_FPU_state()
// Save CPU & FPU state
__ push_FPU_state();
// push_FPU_state() resets the FP top of stack
// Load original double into FP top of stack
// Store double into stack as outgoing argument
// Prepare FPU for doing math in C-land
// Call the C code to massage the double. Result in EAX
if (t == T_INT)
{ BLOCK_COMMENT("SharedRuntime::d2i"); }
else if (t == T_LONG)
{ BLOCK_COMMENT("SharedRuntime::d2l"); }
// Restore CPU & FPU state
__ pop_FPU_state();
return start;
}
//---------------------------------------------------------------------------
// The following routine generates a subroutine to throw an asynchronous
// UnknownError when an unsafe access gets a fault that could not be
BLOCK_COMMENT("call handle_unsafe_access");
return start;
}
//----------------------------------------------------------------------------------------------------
// Non-destructive plausibility checks for oops
// Incoming arguments on stack after saving rax,:
//
// [tos ]: saved rdx
// [tos + 1]: saved EFLAGS
// [tos + 2]: return address
// [tos + 3]: char* error message
// [tos + 4]: oop object to verify
// [tos + 5]: saved rax, - saved by caller and bashed
// make sure object is 'reasonable'
// Check if the oop is in the right area of memory
// make sure klass is 'reasonable'
// Check if the klass is in the right area of memory
// make sure klass' klass is 'reasonable'
// of memory it is broken too.
// return if everything seems ok
// handle errors
BLOCK_COMMENT("call MacroAssembler::debug");
return start;
}
//
// Generate pre-barrier for array stores
//
// Input:
// start - starting address
// count - element count
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
// With G1, don't generate the call if we statically know that the target in uninitialized
if (!uninitialized_target) {
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
case BarrierSet::ModRef:
break;
default :
}
}
//
// Generate a post-barrier for an array store
//
// start - starting address
// count - element count
//
// The two input registers are overwritten.
//
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
}
break;
case BarrierSet::ModRef:
break;
default :
}
}
// Copy 64 bytes chunks
//
// Inputs:
// from - source array address
// to_from - destination array address - from
// qword_count - 8-bytes element count, negative
//
// Copy 64-byte chunks
if (UseUnalignedLoadStores) {
if (UseAVX >= 2) {
} else {
}
} else {
}
// clean upper bits of YMM registers
__ vzeroupper();
}
//
// length is too short, just copy qwords
//
}
// Copy 64 bytes chunks
//
// Inputs:
// from - source array address
// to_from - destination array address - from
// qword_count - 8-bytes element count, negative
//
// Copy 64-byte chunks
//
// length is too short, just copy qwords
//
}
bool dest_uninitialized = false) {
BLOCK_COMMENT("Entry:");
}
if (t == T_OBJECT) {
}
// align source address at 4 bytes address boundary
if (t == T_BYTE) {
// One byte misalignment happens only for byte arrays
}
// Two bytes misalignment happens only for byte and short (char) arrays
}
if (!VM_Version::supports_mmx()) {
} else {
if (!UseUnalignedLoadStores) {
// align to 8 bytes, we know we are 4 byte aligned to start
}
//
// Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
//
if (UseXMMForArrayCopy) {
} else {
}
}
// copy tailing dword
// copy tailing word
if (t == T_BYTE) {
// copy tailing byte
} else {
}
} else {
}
if (t == T_OBJECT) {
}
return start;
}
BLOCK_COMMENT("Entry:");
return start;
}
bool dest_uninitialized = false) {
BLOCK_COMMENT("Entry:");
}
// nooverlap_target expects arguments in rsi and rdi.
// arrays overlap test: dispatch to disjoint stub if necessary.
if (t == T_OBJECT) {
}
// copy from high to low
// Align the end of destination array at 4 bytes address boundary
if (t == T_BYTE) {
// One byte misalignment happens only for byte arrays
}
// Two bytes misalignment happens only for byte and short (char) arrays
}
if (!VM_Version::supports_mmx()) {
} else {
// Align to 8 bytes the end of array. It is aligned to 4 bytes already.
// Move 8 bytes
if (UseXMMForArrayCopy) {
} else {
}
if (!UseXMMForArrayCopy) {
}
}
// copy prefix qword
// copy prefix dword
if (t == T_BYTE) {
// copy prefix byte
} else {
}
} else {
}
if (t == T_OBJECT) {
}
return start;
}
BLOCK_COMMENT("Entry:");
if (VM_Version::supports_mmx()) {
if (UseXMMForArrayCopy) {
} else {
}
} else {
}
return start;
}
BLOCK_COMMENT("Entry:");
// arrays overlap test
if (VM_Version::supports_mmx()) {
if (UseXMMForArrayCopy) {
} else {
}
} else {
}
}
return start;
}
// Helper for generating a dynamic type check.
// The sub_klass must be one of {rbx, rdx, rsi}.
// The temp is killed.
BLOCK_COMMENT("type_check:");
// The following is a strange variation of the fast path which requires
// one less register, because needed values are on the argument stack.
// __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
// L_success, L_failure, NULL);
// if the pointers are equal, we are done (e.g., String[] elements)
// check the supertype display:
// if it was a primary super, we can just fail immediately
// The repne_scan instruction uses fixed registers, which will get spilled.
// We happen to know this works best when super_klass is in rax.
}
//
// Generate checkcasting array copy stub
//
// Input:
// 4(rsp) - source array address
// 8(rsp) - destination array address
// 12(rsp) - element count, can be zero
// 16(rsp) - size_t ckoff (super_check_offset)
// 20(rsp) - oop ckval (super_klass)
//
// Output:
// rax, == 0 - success
// rax, == -1^K - failure, where K is partial transfer count
//
address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) {
// register use:
// rax, rdx, rcx -- loop control (end_from, end_to, count)
// rdi, rsi -- element access (oop, klass)
// rbx, -- temp
// Load up:
BLOCK_COMMENT("Entry:");
}
//---------------------------------------------------------------
// Assembler stub will be used for this call to arraycopy
// if the two arrays are subtypes of Object[] but the
// destination array type is not equal to or a supertype
// of the source type. Each element must be separately
// checked.
// Loop-invariant addresses. They are exclusive end pointers.
// Loop-variant addresses. They assume post-incremented count < 0.
// Copy from low to high addresses, indexed from the end of each array.
// Empty array: Nothing to do.
// ======== begin loop ========
// (Loop is rotated; its entry is L_load_element.)
// Loop control:
// for (count = -count; count != 0; count++)
// Base pointers src, dst are biased by 8*count,to last element.
// ======== loop entry is here ========
// (Could do a trick here: Remember last successful non-null
// element stored and make a quick oop equality check on it.)
&L_store_element, NULL);
// (On fall-through, we have failed the element type check.)
// ======== end loop ========
// It was a real error; we must depend on the caller to finish the job.
// Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
// Emit GC store barriers for the oops we have copied (length_arg + count),
// and report their number to the caller.
// Come here on success only.
// Common exit point (success or failure).
return start;
}
//
// Generate 'unsafe' array copy stub
// Though just as safe as the other stubs, it takes an unscaled
// size_t argument instead of an element count.
//
// Input:
// 4(rsp) - source array address
// 8(rsp) - destination array address
// 12(rsp) - byte count, can be zero
//
// Output:
// rax, == 0 - success
// rax, == -1 - need to call System.arraycopy
//
// Examines the alignment of the operands and dispatches
// to a long, int, short, or byte copy loop.
//
// Load up:
// bump this on entry, not on exit:
return start;
}
// Perform range checks on the proposed arraycopy.
// Smashes src_pos and dst_pos. (Uses them up for temps.)
BLOCK_COMMENT("arraycopy_range_checks:");
// if (src_pos + length > arrayOop(src)->length() ) FAIL;
// if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
BLOCK_COMMENT("arraycopy_range_checks done");
}
//
// Generate generic array copy stubs
//
// Input:
// 4(rsp) - src oop
// 8(rsp) - src_pos
// 12(rsp) - dst oop
// 16(rsp) - dst_pos
// 20(rsp) - element count
//
// Output:
// rax, == 0 - success
// rax, == -1^K - failure, where K is partial transfer count
//
}
// Short-hop target to L_failed. Makes for denser prologue code.
// bump this on entry, not on exit:
// Input values
//-----------------------------------------------------------------------
// Assembler stub will be used for this call to arraycopy
// if the following conditions are met:
//
// (1) src and dst must not be null.
// (2) src_pos must not be negative.
// (3) dst_pos must not be negative.
// (4) length must not be negative.
// (5) src klass and dst klass should be the same and not NULL.
// (6) src and dst should be arrays.
// (7) src_pos + length must not exceed length of src.
// (8) dst_pos + length must not exceed length of dst.
//
// if (src == NULL) return -1;
// if (src_pos < 0) return -1;
// if (dst == NULL) return -1;
// if (dst_pos < 0) return -1;
// if (length < 0) return -1;
// if (src->klass() == NULL) return -1;
#ifdef ASSERT
// assert(src->klass() != NULL);
BLOCK_COMMENT("assert klasses not null");
BLOCK_COMMENT("assert done");
}
#endif //ASSERT
// Load layout helper (32-bits)
//
// |array_tag| | header_size | element_type | |log2_element_size|
// 32 30 24 16 8 2 0
//
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
//
// Handle objArrays completely differently...
// if (src->klass() != dst->klass()) return -1;
// if (!src->is_Array()) return -1;
// At this point, it is known to be a typeArray (array_tag 0x3).
#ifdef ASSERT
{ Label L;
}
#endif
// typeArrayKlass
//
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
//
// next registers should be set before the jump to corresponding stub
// some of them should be duplicated on stack
BLOCK_COMMENT("scale indexes to element size");
BLOCK_COMMENT("choose copy loop based on element size");
#ifdef ASSERT
#endif
// objArrayKlass
// live at this point: rcx_src_klass, src[_pos], dst[_pos]
// test array classes for subtyping
// Identically typed arrays can be copied without element-wise checks.
// live at this point: rcx_src_klass, dst[_pos], src[_pos]
{
// Handy offsets:
// Before looking at dst.length, make sure dst is also an objArray.
// It is safe to examine both src.length and dst.length.
// (Now src_pos and dst_pos are killed, but not src and dst.)
// We'll need this temp (don't forget to pop it after the type check).
// (On fall-through, we have passed the array type check.)
// Reshuffle arguments so we can call checkcast_arraycopy:
// match initial saves for checkcast_arraycopy
// push(rsi); // already done; see above
// push(rdi); // already done; see above
// push(rbx); // already done; see above
// Marshal outgoing arguments now, freeing registers.
// push rbx, changed the incoming offsets (why not just use rbp,??)
// assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, "");
}
return start;
}
void generate_arraycopy_stubs() {
"arrayof_jbyte_disjoint_arraycopy");
NULL, "arrayof_jbyte_arraycopy");
"jbyte_disjoint_arraycopy");
&entry_jbyte_arraycopy, "jbyte_arraycopy");
"arrayof_jshort_disjoint_arraycopy");
NULL, "arrayof_jshort_arraycopy");
"jshort_disjoint_arraycopy");
&entry_jshort_arraycopy, "jshort_arraycopy");
// Next arrays are always aligned on 4 bytes at least.
"jint_disjoint_arraycopy");
&entry_jint_arraycopy, "jint_arraycopy");
"oop_disjoint_arraycopy");
&entry_oop_arraycopy, "oop_arraycopy");
"oop_disjoint_arraycopy_uninit",
/*dest_uninitialized*/true);
NULL, "oop_arraycopy_uninit",
/*dest_uninitialized*/true);
"jlong_arraycopy");
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
generate_unsafe_copy("unsafe_arraycopy",
generate_generic_copy("generic_arraycopy",
}
void generate_math_stubs() {
{
}
{
}
{
}
{
}
{
}
{
__ exp_with_fallback(0);
}
{
__ pow_with_fallback(0);
}
}
// AES intrinsic stubs
return start;
}
// Utility routine for loading a 128-bit key word in little endian format
// can optionally specify that the shuffle mask is already in an xmmregister
if (xmm_shuf_mask != NULL) {
} else {
}
}
// aesenc using specified key+offset
// can optionally specify that the shuffle mask is already in an xmmregister
void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
}
// aesdec using specified key+offset
// can optionally specify that the shuffle mask is already in an xmmregister
void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
}
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
//
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// For encryption, the java expanded key ordering is just what we need
return start;
}
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
//
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// for decryption java expanded key ordering is rotated one position from what we want
// so we start from 0x10 here and hit 0x00 last
// we don't know if the key is aligned, hence not using load-execute form
// for decryption the aesdeclast operation is always on key+0x00
return start;
}
if (saving) {
} else {
// restoring
}
}
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
// c_rarg3 - r vector byte array address
// c_rarg4 - input length
//
// and left with the results of the last encryption block
// xmm register assignments for the loops below
// first 6 keys preloaded into xmm2-xmm7
handleSOERegisters(true /*saving*/);
// load registers from incoming parameters
// load up xmm regs 2 thru 7 with keys 0-5
offset += 0x10;
}
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// 128 bit code follows here
}
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
handleSOERegisters(false /*restoring*/);
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
// 192-bit code follows here (could be changed to use more xmm registers)
}
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
// 256-bit code follows here (could be changed to use more xmm registers)
}
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
return start;
}
// CBC AES Decryption.
// In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
//
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
// c_rarg3 - r vector byte array address
// c_rarg4 - input length
//
// and left with the results of the last encryption block
// xmm register assignments for the loops below
// first 6 keys preloaded into xmm2-xmm7
handleSOERegisters(true /*saving*/);
// load registers from incoming parameters
// the java expanded key ordering is rotated one position from what we want
// so we start from 0x10 here and hit 0x00 last
// load up xmm regs 2 thru 6 with first 5 keys
offset += 0x10;
}
// inside here, use the rvec register to point to previous block cipher
// with which we xor at the end of each newly decrypted block
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// 128-bit code follows here, parallelized
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
}
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
handleSOERegisters(false /*restoring*/);
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
// 192-bit code follows here (could be optimized to use parallelism)
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
}
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
// 256-bit code follows here (could be optimized to use parallelism)
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
}
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
}
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
return start;
}
public:
// Information about frame layout at time of blocking runtime call.
// Note that we only have to preserve callee-saved registers since
// the compilers are responsible for supplying a continuation point
// if they expect all registers to be preserved.
enum layout {
};
private:
//------------------------------------------------------------------------------------------------------------------------
// Continuation point for throwing of implicit exceptions that are not handled in
// the current activation. Fabricates an exception oop and initiates normal
// exception dispatching in this frame.
//
// Previously the compiler (c2) allowed for callee save registers on Java calls.
// This is no longer true after adapter frames were removed but could possibly
// be brought back in the future if the interpreter code was reworked and it
// was deemed worthwhile. The comment below was left to describe what must
// happen here if callee saves were resurrected. As it stands now this stub
// could actually be a vanilla BufferBlob and have now oopMap at all.
// Since it doesn't make much difference we've chosen to leave it the
// way it was in the callee save days and keep the comment.
// If we need to preserve callee-saved values we need a callee-saved oop map and
// therefore have to make these stubs into RuntimeStubs rather than BufferBlobs.
// If the compiler needs all registers to be preserved between the fault
// point and the exception handler then it must assume responsibility for that in
// AbstractCompiler::continuation_for_implicit_null_exception or
// continuation_for_implicit_division_by_zero_exception. All other implicit
// exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
// either at call sites or otherwise assume that stack unwinding will be initiated,
// so caller saved registers were assumed volatile in the compiler.
// This is an inlined and slightly modified version of call_VM
// which has the ability to fetch the return PC out of
// thread-local storage and also sets up last_Java_sp slightly
// differently than the real call_VM
// pc and rbp, already pushed
// Frame is now completed as far as size and linkage.
// push java thread (becomes first argument of C function)
}
}
// Set up last_Java_sp and last_Java_fp
// Call runtime
BLOCK_COMMENT("call runtime_entry");
// Generate oop map
// restore the thread (cannot use the pushed argument since arguments
// may be overwritten by C code generated by an optimizing compiler);
// however can use the register value directly if it is callee saved.
// check for pending exceptions
#ifdef ASSERT
Label L;
#endif /* ASSERT */
RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
return stub->entry_point();
}
void create_control_words() {
// Round to nearest, 53-bit mode, exceptions masked
// Round to zero, 53-bit mode, exception mased
// Round to nearest, 24-bit mode, exceptions masked
// Round to nearest, 64-bit mode, exceptions masked
// Round to nearest, 64-bit mode, exceptions masked
// Note: the following two constants are 80-bit values
// layout is critical for correct loading by FPU.
}
//---------------------------------------------------------------------------
// Initialization
void generate_initial() {
// Generates all stubs and initializes the entry points
//------------------------------------------------------------------------------------------------------------------------
// entry points that exist in all platforms
// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
// the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
// is referenced by megamorphic call
// platform dependent
// Build this early so it's available for the interpreter
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
}
void generate_all() {
// Generates all stubs and initializes the entry points
// These entry points require SharedInfo::stack0 to be set up in non-core builds
// and need to be relocatable, so they each fabricate a RuntimeStub internally.
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
//------------------------------------------------------------------------------------------------------------------------
// entry points that are platform specific
// support for verify_oop (must happen after universe_init)
// arraycopy stubs used by compilers
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
}
}
public:
if (all) {
generate_all();
} else {
}
}
}; // end class declaration
}