/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "assembler_x86.inline.hpp"
#include "code/debugInfoRec.hpp"
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
#include "interpreter/interpreter.hpp"
#include "oops/compiledICHolderOop.hpp"
#include "prims/jvmtiRedefineClassesTrace.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/vframeArray.hpp"
#include "vmreg_x86.inline.hpp"
#ifdef COMPILER1
#include "c1/c1_Runtime1.hpp"
#endif
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
class SimpleRuntimeFrame {
public:
// Most of the runtime stubs have this simple frame layout.
// This class exists to make the layout shared in one place.
// Offsets are for compiler stack slots, which are jints.
enum layout {
// The frame sender code expects that rbp will be in the "natural" place and
// will override any oopMap setting for it. We must therefore force the layout
// so that it agrees with the frame sender code.
};
};
class RegisterSaver {
// Capture info about frame layout. Layout offsets are in jint
// units because compiler frame slots are jints.
#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
enum layout {
DEF_XMM_OFFS(0),
// 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
// The frame sender code expects that rbp will be in the "natural" place and
// will override any oopMap setting for it. We must therefore force the layout
// so that it agrees with the frame sender code.
};
public:
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
// Offsets into the register save area
// Used by deoptimization when it is managing result register
// values on its own
// During deoptimization only the result registers need to be restored,
// all the other values have already been extracted.
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
int vect_words = 0;
#ifdef COMPILER2
if (save_vectors) {
// Save upper half of YMM registes
}
#else
#endif
// Always make the frame size 16-byte aligned
// OopMap frame size is in compiler stack slots (jint's) not bytes or words
// The caller will allocate additional_frame_words
// CodeBlob frame size is in words.
// Save registers, fpu state, and flags.
// We assume caller has already pushed the return address onto the
// stack, so rsp is 8-byte aligned here.
// We push rpb twice in this sequence because we want the real rbp
// to be under the return like a normal enter.
if (vect_words > 0) {
}
if (frame::arg_reg_save_area_bytes != 0) {
// Allocate argument register save area
}
// Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This
// will allow deoptimization at this safepoint to find all possible
// debug-info recordings, as well as let GC find all oops.
// rbp location is known implicitly by the frame sender code, needs no oopmap
// and the location where rbp was saved by is ignored
// %%% These should all be a waste but we'll keep things as they were for now
if (true) {
// rbp location is known implicitly by the frame sender code, needs no oopmap
}
return map;
}
if (frame::arg_reg_save_area_bytes != 0) {
// Pop arg register save area
}
#ifdef COMPILER2
if (restore_vectors) {
// Restore upper half of YMM registes.
}
#else
#endif
// Recover CPU state
__ pop_CPU_state();
// Get the rbp described implicitly by the calling convention (no oopMap)
}
// Just restore result register. Only used by deoptimization. By
// now any callee save register that needs to be restored to a c2
// caller of the deoptee has been extracted into the vframeArray
// and will be stuffed into the c2i adapter we create for later
// restoration so only result registers need to be restored here.
// Restore fp result register
// Restore integer result register
// Pop all of the register save are off the stack except the return address
}
// Is vector's size (in bytes) bigger than a size saved by default?
return size > 16;
}
// The java_calling_convention describes stack locations as ideal slots on
// a frame with no abi restrictions. Since we must observe abi restrictions
// (like the placement of the register window) the slots must be biased by
// the following value.
// Account for saved rbp and return address
// This should really be in_preserve_stack_slots
}
}
// ---------------------------------------------------------------------------
// Read the array of BasicTypes from a signature, and compute where the
// arguments should go. Values in the VMRegPair regs array refer to 4-byte
// quantities. Values less than VMRegImpl::stack0 are registers, those above
// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
// as framesizes are fixed.
// VMRegImpl::stack0 refers to the first slot 0(sp).
// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
// up to RegisterImpl::number_of_registers) are the 64-bit
// integer registers.
// Note: the INPUTS in sig_bt are in units of Java argument words, which are
// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
// units regardless of build. Of course for i486 there is no 64 bit build
// The Java calling convention is a "shifted" version of the C ABI.
// By skipping the first C ABI register we can call non-static jni methods
// with small numbers of arguments without having to shuffle the arguments
// at all. Since we control the java ABI we ought to at least get some
// advantage out of it.
int total_args_passed,
int is_outgoing) {
// Create the mapping between argument positions and
// registers.
};
};
for (int i = 0; i < total_args_passed; i++) {
switch (sig_bt[i]) {
case T_BOOLEAN:
case T_CHAR:
case T_BYTE:
case T_SHORT:
case T_INT:
} else {
stk_args += 2;
}
break;
case T_VOID:
// halves of T_LONG or T_DOUBLE
break;
case T_LONG:
// fall through
case T_OBJECT:
case T_ARRAY:
case T_ADDRESS:
} else {
stk_args += 2;
}
break;
case T_FLOAT:
} else {
stk_args += 2;
}
break;
case T_DOUBLE:
} else {
stk_args += 2;
}
break;
default:
break;
}
}
}
// Patch the callers callsite with entry to compiled code if it exists.
Label L;
// Save the current stack pointer
// Schedule the branch target address early.
// Call into the VM to patch the caller, then jump to compiled callee
// rax isn't live so capture return address while we easily can
// align stack so push_CPU_state doesn't fault
__ push_CPU_state();
// VM needs caller's callsite
// VM needs target method
// This needs to be a long call since we will relocate this adapter to
// the codeBuffer and it may not reach
// Allocate argument register save area
if (frame::arg_reg_save_area_bytes != 0) {
}
// De-allocate argument register save area
if (frame::arg_reg_save_area_bytes != 0) {
}
__ pop_CPU_state();
// restore sp
}
int total_args_passed,
int comp_args_on_stack,
Label& skip_fixup) {
// Before we get into the guts of the C2I adapter, see if we should be here
// at all. We've come from compiled code and are attempting to jump to the
// interpreter, which means the caller made a static call to get here
// (vcalls always get a compiled target if there is one). Check for a
// compiled target. If there is one, we need to patch the caller's call.
// Since all args are passed on the stack, total_args_passed *
// Interpreter::stackElementSize is the space we need. Plus 1 because
// we also account for the return address location since
// we store it first rather than hold it in rax across all the shuffling
// stack is aligned, keep it that way
// Get return address
// set senderSP value
// Store the return address in the expected location
// Now write the args into the outgoing interpreter space
for (int i = 0; i < total_args_passed; i++) {
continue;
}
// offset to start parameters
// Say 4 args:
// i st_off
// 0 32 T_LONG
// 1 24 T_VOID
// 2 16 T_OBJECT
// 3 8 T_BOOL
// - 0 return address
//
// a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
// leaves one slot empty and only stores to a single slot. In this case the
// slot that is occupied is the T_VOID slot. See I said it was confusing.
continue;
}
// memory to memory use rax
// sign extend??
} else {
// Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
// T_DOUBLE and T_LONG use two slots in the interpreter
// ld_off == LSW, ld_off+wordSize == MSW
// st_off == MSW, next_off == LSW
#ifdef ASSERT
// Overwrite the unused slot with known junk
#endif /* ASSERT */
} else {
}
}
} else if (r_1->is_Register()) {
// must be only an int (or less ) so move only 32bits to slot
// why not sign extend??
} else {
// Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
// T_DOUBLE and T_LONG use two slots in the interpreter
#ifdef ASSERT
// Overwrite the unused slot with known junk
#endif /* ASSERT */
} else {
}
}
} else {
// only a float use just part of the slot
} else {
#ifdef ASSERT
// Overwrite the unused slot with known junk
#endif /* ASSERT */
}
}
}
// Schedule the branch target address early.
}
}
int total_args_passed,
int comp_args_on_stack,
// Note: r13 contains the senderSP on entry. We must preserve it since
// we may do a i2c -> c2i transition if we lose a race where compiled
// code goes non-entrant while we get args ready.
// In addition we use r13 to locate all the interpreter args as
// we must align the stack to 16 bytes on an i2c entry else we
// lose alignment we expect in all compiled code and register
// save code can segv when fxsave instructions find improperly
// aligned stack pointer.
// Adapters can be frameless because they do not require the caller
// to perform additional cleanup work, such as correcting the stack pointer.
// An i2c adapter is frameless because the *caller* frame, which is interpreted,
// routinely repairs its own stack pointer (from interpreter_frame_last_sp),
// even if a callee has modified the stack pointer.
// A c2i adapter is frameless because the *callee* frame, which is interpreted,
// routinely repairs its caller's stack pointer (from sender_sp, which is set
// up via the senderSP register).
// In other words, if *either* the caller or callee is interpreted, we can
// get the stack pointer repaired after a call.
// This is why c2i and i2c adapters cannot be indefinitely composed.
// In particular, if a c2i adapter were to somehow call an i2c adapter,
// both caller and callee would be compiled methods, and neither would
// clean up the stack pointer changes performed by the two adapters.
// If this happens, control eventually transfers back to the compiled
// caller, but with an uncorrected stack, causing delayed havoc.
// Pick up the return address
if (VerifyAdapterCalls &&
// assert(Interpreter::contains($return_addr) ||
// StubRoutines::contains($return_addr),
// "i2c adapter must return to an interpreter frame");
L_ok);
L_ok);
L_ok);
}
// Must preserve original SP for loading incoming arguments because
// we need to align the outgoing SP for compiled code.
// in registers, we will occasionally have no stack args.
int comp_words_on_stack = 0;
if (comp_args_on_stack) {
// Sig words on the stack are greater-than VMRegImpl::stack0. Those in
// registers are below. By subtracting stack0, we either get a negative
// number (all values in registers) or the maximum stack slot accessed.
// Convert 4-byte c2 stack slots to words.
comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
// Round up to miminum stack alignment, in wordSize
}
// Ensure compiled code always sees stack at proper alignment
// push the return address and misalign the stack that youngest frame always sees
// as far as the placement of the call instruction
// Put saved SP in another register
// Will jump to the compiled code just as if compiled code was doing it.
// Pre-load the register-jump target early, to schedule it better.
// Now generate the shuffle code. Pick up all register args and move the
// rest through the floating point stack top.
for (int i = 0; i < total_args_passed; i++) {
// Longs and doubles are passed in native word order, but misaligned
// in the 32-bit build.
continue;
}
// Pick up 0, 1 or 2 words from SP+offset.
"scrambled load targets?");
// Load in argument order going down.
// Point to interpreter value (vs. tag)
//
//
//
continue;
}
// Convert stack slot to an SP offset (+ wordSize to account for return address )
// We can use r13 as a temp here because compiled code doesn't need r13 as an input
// and if we end up going thru a c2i because of a miss a reasonable value of r13
// will be generated.
// sign extend???
} else {
//
// We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
// the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
// So we must adjust where to pick up the data to match the interpreter.
//
// Interpreter local[n] == MSW, local[n+1] == LSW however locals
// are accessed as negative so LSW is at LOW address
// ld_off is MSW so get LSW
// st_off is LSW (i.e. reg.first())
}
//
// We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
// the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
// So we must adjust where to pick up the data to match the interpreter.
// this can be a misaligned move
} else {
// sign extend and use a full word?
}
} else {
} else {
}
}
}
// 6243940 We might end up in handle_wrong_method if
// the callee is deoptimized as we race thru here. If that
// happens we don't want to take a safepoint because the
// caller frame will look interpreted and arguments are now
// "compiled" so it is much better to make this transition
// invisible to the stack walking code. Unfortunately if
// we try and find the callee by normal means a safepoint
// is possible. So we stash the desired callee in the thread
// and the vm will find there should this case occur.
// put methodOop where a c2i would expect should we end up there
// only needed becaus eof c2 resolve stubs return methodOop as a result in
// rax
}
// ---------------------------------------------------------------
int total_args_passed,
int comp_args_on_stack,
// -------------------------------------------------------------------------
// Generate a C2I adapter. On entry we know rbx holds the methodOop during calls
// to the interpreter. The args start out packed in the compiled layout. They
// need to be unpacked into the interpreter layout. This will almost always
// require some stack space. We grow the current (compiled) stack, then repack
// the args. We finally end in a jump to the generic interpreter entry point.
// On exit from the interpreter, the interpreter will restore our SP (lest the
// compiled code, which relys solely on SP and not RBP, get sick).
{
// Method might have been compiled since the call site was patched to
// interpreted if that is the case treat it as a miss so we can get
// the call site corrected.
}
}
int total_args_passed) {
// We return the amount of VMRegImpl stack slots we need to reserve for all
// the arguments NOT counting out_preserve_stack_slots.
// NOTE: These arrays will have to change when c1 is ported
#ifdef _WIN64
};
};
#else
};
};
#endif // _WIN64
for (int i = 0; i < total_args_passed; i++) {
switch (sig_bt[i]) {
case T_BOOLEAN:
case T_CHAR:
case T_BYTE:
case T_SHORT:
case T_INT:
#ifdef _WIN64
fp_args++;
// Allocate slots for callee to stuff register args the stack.
stk_args += 2;
#endif
} else {
stk_args += 2;
}
break;
case T_LONG:
// fall through
case T_OBJECT:
case T_ARRAY:
case T_ADDRESS:
#ifdef _WIN64
fp_args++;
stk_args += 2;
#endif
} else {
stk_args += 2;
}
break;
case T_FLOAT:
#ifdef _WIN64
int_args++;
// Allocate slots for callee to stuff register args the stack.
stk_args += 2;
#endif
} else {
stk_args += 2;
}
break;
case T_DOUBLE:
#ifdef _WIN64
int_args++;
// Allocate slots for callee to stuff register args the stack.
stk_args += 2;
#endif
} else {
stk_args += 2;
}
break;
case T_VOID: // Halves of longs and doubles
break;
default:
break;
}
}
#ifdef _WIN64
// windows abi requires that we always allocate enough stack space
// for 4 64bit registers to be stored down.
if (stk_args < 8) {
stk_args = 8;
}
#endif // _WIN64
return stk_args;
}
// On 64 bit we will store integer like items to the stack as
// 64 bits items (sparc abi) even though java would only store
// 32bits for a parameter. On 32bit it will simply be 32 bits
// So this routine will do 32->32 on 32bit and 32->64 on 64bit
// stack to stack
} else {
// stack to reg
}
// reg to stack
// Do we really have to sign extend???
// __ movslq(src.first()->as_Register(), src.first()->as_Register());
} else {
// Do we really have to sign extend???
// __ movslq(dst.first()->as_Register(), src.first()->as_Register());
}
}
}
// stack to stack
} else {
// stack to reg
}
// reg to stack
} else {
}
}
}
// An oop arg. Must pass a handle not the oop itself
int oop_handle_offset,
int framesize_in_slots,
bool is_receiver,
int* receiver_offset) {
// must pass a handle. First figure out the location we use as a handle
// See if oop is NULL if it is we need no handle
// Oop is already on the stack as an argument
if (is_receiver) {
}
// conditionally move a NULL
} else {
// Oop is in an a register we must store it to the space we reserve
// on the stack for oop_handles and pass a handle if oop is non-NULL
int oop_slot;
oop_slot = 0;
oop_slot = 1;
oop_slot = 2;
oop_slot = 3;
oop_slot = 4;
else {
oop_slot = 5;
}
// Store oop in handle area, may be NULL
if (is_receiver) {
}
// conditionally move a NULL from the handle area where it was just stored
}
// If arg is on the stack then place it otherwise it is already in correct reg.
}
}
// A float arg may have to do float reg int reg conversion
// The calling conventions assures us that each VMregpair is either
// all really one physical register or adjacent stack slots.
// This greatly simplifies the cases here compared to sparc.
} else {
// stack to reg
}
// reg to stack
} else {
// reg to reg
// In theory these overlap but the ordering is such that this is likely a nop
}
}
}
// A long move
// The calling conventions assures us that each VMregpair is either
// all really one physical register or adjacent stack slots.
// This greatly simplifies the cases here compared to sparc.
if (src.is_single_phys_reg() ) {
if (dst.is_single_phys_reg()) {
}
} else {
}
} else if (dst.is_single_phys_reg()) {
} else {
}
}
// A double move
// The calling conventions assures us that each VMregpair is either
// all really one physical register or adjacent stack slots.
// This greatly simplifies the cases here compared to sparc.
if (src.is_single_phys_reg() ) {
if (dst.is_single_phys_reg()) {
// In theory these overlap but the ordering is such that this is likely a nop
}
} else {
}
} else if (dst.is_single_phys_reg()) {
} else {
}
}
// We always ignore the frame_slots arg and just use the space just below frame pointer
// which by this time is free to use
switch (ret_type) {
case T_FLOAT:
break;
case T_DOUBLE:
break;
case T_VOID: break;
default: {
}
}
}
void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
// We always ignore the frame_slots arg and just use the space just below frame pointer
// which by this time is free to use
switch (ret_type) {
case T_FLOAT:
break;
case T_DOUBLE:
break;
case T_VOID: break;
default: {
}
}
}
}
}
}
}
}
}
const int stack_slots,
const int total_in_args,
const int arg_save_area,
// if map is non-NULL then the code should store the values,
// otherwise it should load them.
// Save down double word first
for ( int i = 0; i < total_in_args; i++) {
} else {
}
}
}
} else {
}
}
}
// Save or restore single word registers
for ( int i = 0; i < total_in_args; i++) {
slot++;
// Value is in an input register pass we must flush it to the stack
switch (in_sig_bt[i]) {
case T_BOOLEAN:
case T_CHAR:
case T_BYTE:
case T_SHORT:
case T_INT:
} else {
}
break;
case T_ARRAY:
case T_LONG:
// handled above
break;
case T_OBJECT:
default: ShouldNotReachHere();
}
slot++;
} else {
}
}
int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
}
}
}
}
// Check GC_locker::needs_gc and enter the runtime if it's true. This
// keeps a new JNI critical region from starting until a GC has been
// forced. Save down any oops in registers and describe them in an
// OopMap.
int stack_slots,
int total_c_args,
int total_in_args,
int arg_save_area,
// Save down any incoming oops and call into the runtime to halt for a GC
__ reset_last_Java_frame(false, true);
#ifdef ASSERT
if (StressCriticalJNINatives) {
// Stress register saving
// Destroy argument registers
for (int i = 0; i < total_in_args - 1; i++) {
// Nothing to do
} else {
}
i++;
}
}
}
#endif
}
// Unpack an array argument into a pointer to the body and the length
// if the array is non-null, otherwise pass 0 for both.
static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
"possible collision");
"possible collision");
// Pass the length, ptr pair
// Load the arg up from the stack
}
__ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
// load the length relative to the body.
// Pass zeros
}
// Different signatures may require very different orders for the move
// to avoid clobbering other arguments. There's no simple way to
// order them safely. Compute a safe order for issuing stores and
// break any cycles in those stores. This code is fairly general but
// it's not necessary on the other platforms so we keep it in the
// platform dependent code instead of moving it into a shared file.
// (See bugs 7013347 & 7145024.)
// Note that this code is specific to LP64.
friend class ComputeMoveOrder;
private:
int _src_index;
int _dst_index;
bool _processed;
}
public:
, _processed(false) {
}
// insert
// create a new store following the last store
// to move from the temp_register to the original
// break the cycle of links and insert new_store at the end
// break the reverse link.
MoveOperation* p = prev();
// change the original store to save it's value in the temp.
}
// link this store in front the store that it depends on
if (n != NULL) {
_next = n;
n->_prev = this;
}
}
};
private:
public:
// Move operations where the dest is the stack can all be
// scheduled first since they can't interfere with the other moves.
c_arg--;
} else {
} else {
}
}
} else {
} else {
}
}
}
// Break any cycles in the register moves and emit the in the
// proper order.
}
}
// Collected all the move operations
}
// Walk the edges breaking cycles between moves. The result list
// can be walked in order to produce the proper set of loads
// Record which moves kill which values
}
"make sure temp isn't in the registers that are killed");
// create links between loads and stores
}
// at this point, all the move operations are chained together
// in a doubly linked list. Processing it backwards finds
// the beginning of the chain, forwards finds the end. If there's
// a cycle it can be broken at any point, so pick an edge and walk
// backward until the list ends or we end where we started.
if (!s->is_processed()) {
// search for the beginning of the chain or cycle
}
}
// walk the chain forward inserting to store list
start->set_processed();
}
}
}
return stores;
}
};
if (VerifyOops) {
for (int i = 0; i < method->size_of_parameters(); i++) {
if (r->is_stack()) {
} else {
}
}
}
}
}
// Now write the args into the outgoing interpreter space
bool has_receiver = false;
if (ref_kind != 0) {
has_receiver = true;
} else {
}
if (member_reg != noreg) {
// Load the member_arg into register, if necessary.
if (r->is_stack()) {
} else {
// no data motion is needed
member_reg = r->as_Register();
}
}
if (has_receiver) {
// Make sure the receiver is loaded into a register.
if (r->is_stack()) {
// Porting note: This assumes that compiled calling conventions always
// pass the receiver oop in a register. If this is not true on some
// platform, pick a temp and load the receiver from stack.
fatal("receiver always in a register");
} else {
// no data motion is needed
receiver_reg = r->as_Register();
}
}
// Figure out which address we are really jumping to:
}
// ---------------------------------------------------------------------------
// Generate a native wrapper for a given method. The method takes arguments
// in the Java compiled code convention, marshals them to the native
// convention (handlizes oops, etc), transitions to native, makes the call,
// returns to java state (possibly blocking), unhandlizes any result and
// returns.
//
// Critical native functions are a shorthand for the use of
// GetPrimtiveArrayCritical and disallow the use of any other JNI
// functions. The wrapper is expected to unpack the arguments before
// passing them to the callee and perform checks before and after the
// native call to ensure that they GC_locker
// lock_critical/unlock_critical semantics are followed. Some other
// parts of JNI setup are skipped like the tear down of the JNI handle
// block and the check for pending exceptions it's impossible for them
// to be thrown.
//
// They are roughly structured like this:
// if (GC_locker::needs_gc())
// SharedRuntime::block_for_jni_critical();
// tranistion to thread_in_native
// unpack arrray arguments and call native entry point
// check for safepoint in progress
// check if any thread suspend flags are set
// call into JVM and possible unlock the JNI critical
// if a GC was suppressed while in the critical native.
// transition back to thread_in_Java
// return to caller
//
int compile_id,
if (method->is_method_handle_intrinsic()) {
in_regs);
in_ByteSize(-1),
in_ByteSize(-1),
}
bool is_critical_native = true;
if (native_func == NULL) {
is_critical_native = false;
}
// An OopMap for lock (and class if static)
// We have received a description of where all the java arg are located
// on entry to the wrapper. We need to convert these args to where
// the jni function will expect them. To figure out where they go
// we convert the java signature to a C signature by inserting
// the hidden arguments as arg[0] and possibly arg[1] (static method)
if (!is_critical_native) {
total_c_args += 1;
total_c_args++;
}
} else {
for (int i = 0; i < total_in_args; i++) {
total_c_args++;
}
}
}
int argc = 0;
if (!is_critical_native) {
}
for (int i = 0; i < total_in_args ; i++ ) {
}
} else {
for (int i = 0; i < total_in_args ; i++ ) {
// Arrays are passed as int, elem* pair
switch (at[1]) {
default: ShouldNotReachHere();
}
}
} else {
in_elem_bt[i] = T_VOID;
}
}
}
}
// Now figure out where the args must be stored and how much stack space
// they require.
int out_arg_slots;
// Compute framesize for the wrapper. We need to handlize all oops in
// incoming registers
// Calculate the total number of stack slots we will need.
// First count the abi requirement plus all of the outgoing args
// Now the space for the inbound oop handle area
if (is_critical_native) {
// Critical natives may have to call out so they need a save area
// for register arguments.
int double_slots = 0;
int single_slots = 0;
for ( int i = 0; i < total_in_args; i++) {
switch (in_sig_bt[i]) {
case T_BOOLEAN:
case T_BYTE:
case T_SHORT:
case T_CHAR:
case T_INT: single_slots++; break;
case T_ARRAY: // specific to LP64 (7145024)
case T_LONG: double_slots++; break;
default: ShouldNotReachHere();
}
switch (in_sig_bt[i]) {
case T_FLOAT: single_slots++; break;
case T_DOUBLE: double_slots++; break;
default: ShouldNotReachHere();
}
}
}
// align the save area
if (double_slots != 0) {
}
}
// Now any space we need for handlizing a klass if static method
int klass_slot_offset = 0;
int lock_slot_offset = 0;
bool is_static = false;
is_static = true;
}
// Plus a lock if needed
if (method->is_synchronized()) {
}
// Now a place (+2) to save return values or temp during shuffling
// + 4 for return address (which we own) and saved rbp
stack_slots += 6;
// Ok The space we have allocated will look like:
//
//
// FP-> | |
// |---------------------|
// | 2 slots for moves |
// |---------------------|
// | lock box (if sync) |
// |---------------------| <- lock_slot_offset
// | klass (if static) |
// |---------------------| <- klass_slot_offset
// | oopHandle area |
// |---------------------| <- oop_handle_offset (6 java arg registers)
// | outbound memory |
// | based arguments |
// | |
// |---------------------|
// | |
// SP-> | out_preserved_slots |
//
//
// Now compute actual number of stack words we need rounding to make
// stack properly aligned.
// First thing make an ic check to see if we should even be here
// We are free to use all registers as temps without saving them and
// restoring them except rbp. rbp is the only callee save register
// as far as the interpreter and the compiler(s) are concerned.
// Verified entry point must be aligned
// The instruction at the verified entry point must be 5 bytes or longer
// because it can be patched on the fly by make_non_entrant. The stack bang
// instruction fits that requirement.
// Generate stack overflow check
if (UseStackBanging) {
} else {
// need a 5 byte instruction to allow MT safe patching to non-entrant
}
// Generate a new frame for the wrapper.
// -2 because return address is already present and so is saved rbp
// Frame is now completed as far as size and linkage.
#ifdef ASSERT
{
Label L;
}
#endif /* ASSERT */
// It is callee save so it survives the call to native
if (is_critical_native) {
}
//
// We immediately shuffle the arguments so that any vm call we have to
// make from here on out (sync slow path, jvmti, etc.) we will have
// captured the oops from our caller and have a valid oopMap for
// them.
// -----------------
// The Grand Shuffle
// The Java calling convention is either equal (linux) or denser (win64) than the
// c calling convention. However the because of the jni_env argument the c calling
// convention always has at least one more (and two for static) arguments than Java.
// Therefore if we move the args from java -> c backwards then we will never have
// a register->register conflict and we don't have to build a dependency graph
// and figure out how to break any cycles.
//
// Record esp-based slot for receiver on stack for non-static methods
// This is a trick. We double the stack slots so we can claim
// the oops in the caller's frame. Since we are sure to have
// more args than the caller doubling is enough to make
// sure we can capture all the incoming oop args from the
// caller.
//
// Mark location of rbp (someday)
// map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
// Use eax, ebx as temporaries during any memory-memory moves we have to do
// All inbound args are referenced based on rbp and all outbound args via rsp.
#ifdef ASSERT
for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
reg_destroyed[r] = false;
}
for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) {
freg_destroyed[f] = false;
}
#endif /* ASSERT */
// This may iterate in two different directions depending on the
// kind of native it is. The reason is that for regular JNI natives
// the incoming and outgoing registers are offset upwards and for
// critical natives they are offset down.
if (!is_critical_native) {
}
} else {
// Compute a valid move order, using tmp_vmreg to break any cycles
ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
}
if (c_arg == -1) {
// This arg needs to be moved to a temporary
temploc = i;
continue;
} else if (i == -1) {
// Read from the temporary location
i = temploc;
temploc = -1;
}
#ifdef ASSERT
}
}
#endif /* ASSERT */
switch (in_sig_bt[i]) {
case T_ARRAY:
if (is_critical_native) {
c_arg++;
#ifdef ASSERT
}
#endif
break;
}
case T_OBJECT:
((i == 0) && (!is_static)),
break;
case T_VOID:
break;
case T_FLOAT:
break;
case T_DOUBLE:
break;
case T_LONG :
break;
default:
}
}
int c_arg;
// Pre-load a static method's oop into r14. Used both by locking code and
// the normal JNI call code.
if (!is_critical_native) {
// point c_arg at the first arg that is already loaded in case we
// need to spill before we call out
// load oop into a register
__ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
// Now handlize the static class mirror it's known not-null.
// Now get the handle
// store the klass handle as second argument
// and protect the arg if we must spill
c_arg--;
}
} else {
// For JNI critical methods we need to save all registers in save_args.
c_arg = 0;
}
// Change state to native (we save the return address in the thread, since it might not
// be pushed on the stack when we do a a stack traversal). It is enough that the pc()
// points into the right code segment. It does not have to be the correct return pc.
// We have all of the arguments setup at this point. We must not touch any register
{
// protect the args we've loaded
}
// RedefineClasses() tracing support for obsolete method entry
// protect the args we've loaded
}
// Lock a synchronized method
// Register definitions used by locking and unlocking
if (method->is_synchronized()) {
// Get the handle (the 2nd argument)
// Get address of the box
// Load the oop from the handle
if (UseBiasedLocking) {
}
// Load immediate 1 into swap_reg %rax
// Load (object->mark() | 1) into swap_reg %rax
// Save (object->mark() | 1) into BasicLock's displaced header
}
// src -> dest iff dest == rax else rax <- dest
// Hmm should this move to the slow path code area???
// Test if the oopMark is an obvious stack pointer, i.e.,
// 1) (mark & 3) == 0, and
// 2) rsp <= mark < mark + os::pagesize()
// These 3 tests can be done by evaluating the following
// expression: ((mark - rsp) & (3 - os::vm_page_size())),
// assuming both stack pointer and pagesize have their
// least significant 2 bits clear.
// NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
// Save the test result, for recursive case, the result is zero
// Slow path will re-enter here
}
// Finally just about ready to make the JNI call
// get JNIEnv* which is first argument to native
if (!is_critical_native) {
}
// Now set thread in native
// Verify or restore cpu control state after JNI call
// Unpack native results.
switch (ret_type) {
case T_INT : /* nothing to do */ break;
case T_DOUBLE :
case T_FLOAT :
// Result is in xmm0 we'll save as needed
break;
case T_ARRAY: // Really a handle
case T_OBJECT: // Really a handle
break; // can't de-handlize until after safepoint check
case T_VOID: break;
case T_LONG: break;
default : ShouldNotReachHere();
}
// Switch thread to "native transition" state before reading the synchronization state.
// This additional state is necessary because reading and testing the synchronization
// state is not atomic w.r.t. GC, as this scenario demonstrates:
// Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
// VM thread changes sync state to synchronizing and suspends threads for GC.
// Thread A is resumed to finish this native method, but doesn't block here since it
// didn't see any synchronization is progress, and escapes.
if (UseMembar) {
// Force this write out before the read below
} else {
// Write serialization page so VM thread can do a pseudo remote membar.
// We use the current thread pointer to calculate a thread specific
// offset to write to within the page. This minimizes bus traffic
// due to cache line collision.
}
}
{
Label L;
// Don't use call_VM as it will see a possible pending exception and forward it
// and never return here preventing us from clearing _last_native_pc down below.
// Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
// by hand.
//
if (!is_critical_native) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
}
// Restore any method result value
if (is_critical_native) {
// The call above performed the transition to thread_in_Java so
// skip the transition logic below.
}
}
// change thread state
__ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled);
// native result if any is live
// Unlock
if (method->is_synchronized()) {
// Get locked oop from the handle we passed to jni
if (UseBiasedLocking) {
}
// Simple recursive lock?
// Must save rax if if it is live now because cmpxchg must use it
}
// get address of the stack lock
// get old displaced header
// Atomic swap old header if oop still contains the stack lock
}
// slow path re-enters here
}
}
{
}
__ reset_last_Java_frame(false, true);
// Unpack oop result
Label L;
}
if (!is_critical_native) {
// reset handle block
}
// pop our frame
if (!is_critical_native) {
// Any exception pending?
}
// Return
// Unexpected paths are out of line and go here
if (!is_critical_native) {
// forward the exception
// and forward the exception
}
// Slow path locking & unlocking
if (method->is_synchronized()) {
// BEGIN Slow path lock
// has last_Java_frame setup. No exceptions so do vanilla call not call_VM
// args are (oop obj, BasicLock* lock, JavaThread* thread)
// protect the args we've loaded
// Not a leaf but we have last_Java_frame setup as we want
#ifdef ASSERT
{ Label L;
}
#endif
// END Slow path lock
// BEGIN Slow path unlock
// If we haven't already saved the native result we must save it now as xmm registers
// are still exposed.
}
// Save pending exception around call to VM (which contains an EXCEPTION_MARK)
// NOTE that obj_reg == rbx currently
#ifdef ASSERT
{
Label L;
}
#endif /* ASSERT */
}
// END Slow path unlock
} // synchronized
// SLOW PATH Reguard the stack if needed
// and continue
oop_maps);
if (is_critical_native) {
nm->set_lazy_critical_native(true);
}
return nm;
}
#ifdef HAVE_DTRACE_H
// ---------------------------------------------------------------------------
// Generate a dtrace nmethod for a given signature. The method takes arguments
// in the Java compiled code convention, marshals them to the native
// abi and then leaves nops at the position you would expect to call a native
// function. When the probe is enabled the nops are replaced with a trap
// instruction that dtrace inserts and the trace will cause a notification
// to dtrace.
//
// arguments. No other java types are allowed. Strings are converted to utf8
// strings so that from dtrace point of view java strings are converted to C
// strings. There is an arbitrary fixed limit on the total space that a method
// can use for converting the strings. (256 chars per string in the signature).
// So any java string larger then this is truncated.
static bool offsets_initialized = false;
// generate_dtrace_nmethod is guarded by a mutex so we are sure to
// be single threaded in this method.
if (!offsets_initialized) {
offsets_initialized = true;
}
// Fill in the signature array, for the calling-convention call.
// The signature we are going to use for the trap that dtrace will see
// is converted to a two-slot long, which is why we double the allocation).
int i=0;
int total_strings = 0;
int first_arg_to_pass = 0;
int total_c_args = 0;
// Skip the receiver as dtrace doesn't want to see it
first_arg_to_pass = 1;
}
// We need to convert the java args to where a native (non-jni) function
// would expect them. To figure out where they go we convert the java
// signature to a C signature.
if (s == vmSymbols::java_lang_String()) {
} else if (s == vmSymbols::java_lang_Boolean() ||
s == vmSymbols::java_lang_Character() ||
s == vmSymbols::java_lang_Byte() ||
s == vmSymbols::java_lang_Short() ||
s == vmSymbols::java_lang_Integer() ||
s == vmSymbols::java_lang_Float()) {
} else if (s == vmSymbols::java_lang_Long() ||
s == vmSymbols::java_lang_Double()) {
}
// We convert double to long
// We convert float to int
}
}
// Now get the compiled-Java layout as input arguments
int comp_args_on_stack;
// Now figure out where the args must be stored and how much stack space
// they require (neglecting out_preserve_stack_slots but space for storing
// the 1st six register arguments). It's weird see int_stk_helper.
int out_arg_slots;
// Calculate the total number of stack slots we will need.
// First count the abi requirement plus all of the outgoing args
// Now space for the string(s) we must convert
for (i = 0; i < total_strings ; i++) {
string_locs[i] = stack_slots;
}
// Plus the temps we might need to juggle register args
// regs take two slots each
// + 4 for return address (which we own) and saved rbp,
stack_slots += 4;
// Ok The space we have allocated will look like:
//
//
// FP-> | |
// |---------------------|
// | string[n] |
// |---------------------| <- string_locs[n]
// | string[n-1] |
// |---------------------| <- string_locs[n-1]
// | ... |
// | ... |
// |---------------------| <- string_locs[1]
// | string[0] |
// |---------------------| <- string_locs[0]
// | outbound memory |
// | based arguments |
// | |
// |---------------------|
// | |
// SP-> | out_preserved_slots |
//
//
// Now compute actual number of stack words we need rounding to make
// stack properly aligned.
// First thing make an ic check to see if we should even be here
// We are free to use all registers as temps without saving them and
// restoring them except rbp. rbp, is the only callee save register
// as far as the interpreter and the compiler(s) are concerned.
// verified entry must be aligned for code patching.
// and the first 5 bytes must be in the same cache line
// if we align at 8 then we will be sure 5 bytes are in the same line
// The instruction at the verified entry point must be 5 bytes or longer
// because it can be patched on the fly by make_non_entrant. The stack bang
// instruction fits that requirement.
// Generate stack overflow check
if (UseStackBanging) {
} else {
}
} else {
// need a 5 byte instruction to allow MT safe patching to non-entrant
}
"valid size for make_non_entrant");
// Generate a new frame for the wrapper.
// -4 because return address is already present and so is saved rbp,
}
// Frame is now completed as far a size and linkage.
// State of input register args
bool rax_is_zero = false;
// All args (except strings) destined for the stack are moved first
// Get the real reg value or a dummy (rsp)
// Even though a string arg in a register is still live after this loop
// after the string conversion loop (next) it will be dead so we take
// advantage of that now for simpler code to manage live.
case T_ARRAY:
case T_OBJECT:
{
// need to unbox a one-word value
} else {
rax_is_zero = false;
}
++c_arg; // skip over T_VOID to keep the loop indices in sync
} else {
}
// Convert the arg to NULL
if (!rax_is_zero) {
rax_is_zero = true;
}
}
}
break;
case T_VOID:
break;
case T_FLOAT:
// This does the right thing since we know it is destined for the
// stack
break;
case T_DOUBLE:
// This does the right thing since we know it is destined for the
// stack
break;
case T_LONG :
break;
default:
}
}
}
// If we have any strings we must store any register based arg to the stack
// This includes any still live xmm registers too.
int sid = 0;
if (total_strings > 0 ) {
// string oops were left untouched by the previous loop even if the
// eventual (converted) arg is destined for the stack so park them
// away now (except for first)
if (sid != 1) {
// The first string arg won't be killed until after the utf8
// conversion
}
// Convert the xmm register to an int and store it in the reserved
// location for the eventual c register arg
} else {
}
} else {
// If the arg is an oop type we don't support don't bother to store
// it remember string was handled above.
if (!useless) {
}
}
}
}
++c_arg; // skip over T_VOID to keep the loop indices in sync
}
}
// Now that the volatile registers are safe, convert all the strings
sid = 0;
// It's a string
// The first string we find might still be in the original java arg
// register
// We will need to eventually save the final argument to the trap
// in the von-volatile location dedicated to src. This is the offset
// from fp we will use.
// This is where the argument will eventually reside
if (sid == 1) {
} else {
}
} else {
// arg is still in the original location
}
// see if the oop is NULL
// Save the ptr to utf string in the origina src loc or the tmp
// dedicated to it
} else {
}
} else {
}
// And do the conversion
}
++c_arg; // skip over T_VOID to keep the loop indices in sync
}
}
// The get_utf call killed all the c_arg registers
}
// Now we can finally move the register args to their desired locations
rax_is_zero = false;
// Only need to look for args destined for the interger registers (since we
// Check if the java arg is unsupported and thereofre useless
// If we're going to kill an existing arg save it first
// you can't kill yourself
}
}
if (!useless) {
}
}
} else {
// If the arg is an oop type we don't support don't bother to store
// it
if (!useless) {
} else {
}
}
}
} else if (!useless) {
// full sized move even for int should be ok
}
// At this point r has the original java arg in the final location
// (assuming it wasn't useless). If the java arg was an oop
// we have a bit more to do
// need to unbox a one-word value
} else {
}
// Convert the arg to NULL
}
}
// dst can longer be holding an input value
}
++c_arg; // skip over T_VOID to keep the loop indices in sync
}
}
// Ok now we are done. Need to place the nop that dtrace wants in order to
// patch in the trap
// Return
return nm;
}
#endif // HAVE_DTRACE_H
// this function returns the adjust size (in number of words) to a c2i adapter
// activation for use during deoptimization
}
return 0;
}
//------------------------------generate_deopt_blob----------------------------
// Allocate space for the code
// Setup code generation tools
int frame_size_in_words;
// -------------
// This code enters when returning to a de-optimized nmethod. A return
// address has been pushed on the the stack, and return values are in
// registers.
// If we are doing a normal deopt then we were called from the patched
// nmethod from the point we returned to the nmethod. So the return
// address on the stack is wrong by NativeCall::instruction_size
// We will adjust the value so it looks like we have the original return
// address on the stack (like when we eagerly deoptimized).
// In the case of an exception pending when deoptimizing, we enter
// with a return address on the stack that points after the call we patched
// into the exception handler. We have the following register state from,
// e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
// rax: exception oop
// rbx: exception handler
// rdx: throwing pc
// So in this case we simply jam rdx into the useless return address and
// the stack looks just like we want.
//
// At this point we need to de-opt. We save the argument return
// registers. We call the first C routine, fetch_unroll_info(). This
// routine captures the return values and returns a structure which
// describes the current frame size and the sizes of all replacement frames.
// The current frame is compiled code and may contain many inlined
// functions, each with their own JVM state. We pop the current frame, then
// push all the new frames. Then we call the C routine unpack_frames() to
// populate these frames. Finally unpack_frames() returns us the new target
// address. Notice that callee-save registers are BLOWN here; they have
// already been captured in the vframeArray at the time the return PC was
// patched.
// Prolog for non exception case!
// Save everything in sight.
// Normal deoptimization. Save exec mode for unpack_frames.
// Reexecute case
// return address is the pc describes what bci to do re-execute at
// No need to update map as each call to save_live_registers will produce identical oopmap
// Prolog for exception case
// all registers are dead at this entry point, except for rax, and
// rdx which contain the exception oop and exception pc
// respectively. Set them in TLS and fall thru to the
// unpack_with_exception_in_tls entry point.
// new implementation because exception oop is now passed in JavaThread
// Prolog for exception case
// All registers must be preserved because they might be used by LinearScan
// Exceptiop oop and throwing PC are passed in JavaThread
// tos: stack at point of call to method that threw the exception (i.e. only
// args are on the stack, no return address)
// make room on stack for the return address
// It will be patched later with the throwing pc. The correct value is not
// available now because loading it from memory would destroy registers.
// Save everything in sight.
// Now it is safe to overwrite any register
// Deopt during an exception. Save exec mode for unpack_frames.
// load throwing pc from JavaThread and patch it as the return address
// of the current frame. Then clear the field in JavaThread
#ifdef ASSERT
// verify that there is really an exception oop in JavaThread
// verify that there is no pending exception
#endif
// Call C code. Need thread and this frame, but NOT official VM entry
// crud. We cannot block on this call, no GC can happen.
//
// UnrollBlock* fetch_unroll_info(JavaThread* thread)
// fetch_unroll_info needs to call last_java_frame().
#ifdef ASSERT
{ Label L;
(int32_t)0);
}
#endif // ASSERT
// Need to have an oopmap that tells fetch_unroll_info where to
// find any register it might need.
__ reset_last_Java_frame(false, false);
// Load UnrollBlock* into rdi
// QQQ this is useless it was NULL above
// Overwrite the result registers with the exception results.
// I think this is useless
// Only register save data is on the stack.
// Now restore the result registers. Everything else is either dead
// or captured in the vframeArray.
// All of the register save area has been popped of the stack. Only the
// return address remains.
//
// Frame picture (youngest to oldest)
// 1: self-frame (no frame link)
// 2: deopting frame (no frame link)
// 3: caller of deopting frame (could be compiled/interpreted).
//
// Note: by leaving the return address of self-frame on the stack
// and using the size of frame 2 to adjust the stack
// when we are done the return to frame 3 will still be on the stack.
// Pop deoptimized frame
__ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
// rsp should be pointing at the return address to the caller (3)
// Stack bang to make sure there's enough room for these interpreter frames.
if (UseStackBanging) {
}
// Load address of array of frame pcs into rcx
// Trash the old pc
// Load address of array of frame sizes into rsi
// Load counter into rdx
// Pick up the initial fp we should save
// Now adjust the caller's stack to make up for the extra locals
// but record the original sp so that we can save it in the skeletal interpreter
// frame and the stack walking of interpreter_sender will get the unextended sp
// value and not the "real" sp value.
// Push interpreter frames in a loop
#ifdef CC_INTERP
#ifdef ASSERT
#else /* ASSERT */
#endif /* ASSERT */
#else
#endif // CC_INTERP
#ifdef CC_INTERP
#else /* CC_INTERP */
// This value is corrected by layout_activation_impl
__ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable
#endif /* CC_INTERP */
// Re-push self-frame
// Allocate a full sized register save area.
// Return address and rbp are in place, so we allocate two less words.
// Restore frame locals after moving the frame
// Call C code. Need thread but NOT official VM entry
// crud. We cannot block on this call, no GC can happen. Call should
// restore return values to their stack-slots with the new SP.
//
// void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
// Use rbp because the frames look interpreted now
// Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
// Don't need the precise return PC here, just precise enough to point into this code blob.
// Revert SP alignment after call since we're going to do some SP relative addressing below
// Set an oopmap for the call site
// Use the same PC we used for the last java frame
new OopMap( frame_size_in_words, 0 ));
// Clear fp AND pc
__ reset_last_Java_frame(true, true);
// Collect return values
// I think this is useless (throwing pc?)
// Pop self-frame.
// Jump to interpreter
// Make sure all code is generated
_deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
}
#ifdef COMPILER2
//------------------------------generate_uncommon_trap_blob--------------------
// Allocate space for the code
// Setup code generation tools
// Push self-frame. We get here with a return address on the
// stack, so rsp is 8-byte aligned until we allocate our frame.
// No callee saved registers. rbp is assumed implicitly saved
// compiler left unloaded_class_index in j_rarg0 move to where the
// runtime expects it.
// Call C code. Need thread but NOT official VM entry
// crud. We cannot block on this call, no GC can happen. Call should
// capture callee-saved registers as well as return values.
// Thread is in rdi already.
//
// UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
// Set an oopmap for the call site
// location of rbp is known implicitly by the frame sender code
__ reset_last_Java_frame(false, false);
// Load UnrollBlock* into rdi
//
// Frame picture (youngest to oldest)
// 1: self-frame (no frame link)
// 2: deopting frame (no frame link)
// 3: caller of deopting frame (could be compiled/interpreted).
// Pop self-frame. We have no frame, and must rely only on rax and rsp.
// Pop deoptimized frame (int)
// rsp should be pointing at the return address to the caller (3)
// Stack bang to make sure there's enough room for these interpreter frames.
if (UseStackBanging) {
}
// Load address of array of frame pcs into rcx (address*)
// Trash the return pc
// Load address of array of frame sizes into rsi (intptr_t*)
// Counter
number_of_frames_offset_in_bytes())); // (int)
// Pick up the initial fp we should save
// Now adjust the caller's stack to make up for the extra locals but
// record the original sp so that we can save it in the skeletal
// interpreter frame and the stack walking of interpreter_sender
// will get the unextended sp value and not the "real" sp value.
caller_adjustment_offset_in_bytes())); // (int)
// Push interpreter frames in a loop
#ifdef CC_INTERP
#else // CC_INTERP
// This value is corrected by layout_activation_impl
#endif // CC_INTERP
// Re-push self-frame
// Prolog
// Use rbp because the frames look interpreted now
// Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
// Don't need the precise return PC here, just precise enough to point into this code blob.
// Call C code. Need thread but NOT official VM entry
// crud. We cannot block on this call, no GC can happen. Call should
// restore return values to their stack-slots with the new SP.
// Thread is in rdi already.
//
// BasicType unpack_frames(JavaThread* thread, int exec_mode);
// Set an oopmap for the call site
// Use the same PC we used for the last java frame
// Clear fp AND pc
__ reset_last_Java_frame(true, true);
// Pop self-frame.
// Jump to interpreter
// Make sure all code is generated
}
#endif // COMPILER2
//------------------------------generate_handler_blob------
//
// Generate a special Compile2Runtime blob that saves all registers,
// and setup oopmap.
//
"must be generated before");
// Allocate space for the code. Setup code generation tools.
int frame_size_in_words;
// Make room for return address (or push it again)
if (!cause_return) {
}
// Save registers, fpu state, and flags
// The following is basically a call_VM. However, we need the precise
// address of the call in order to generate an oopmap. Hence, we do all the
// work outselves.
// The return address must always be correct so that frame constructor never
// sees an invalid pc.
if (!cause_return) {
// overwrite the dummy value we pushed on entry
}
// Do the call
// Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This
// will allow deoptimization at this safepoint to find all possible
// debug-info recordings, as well as let GC find all oops.
__ reset_last_Java_frame(false, false);
// Exception pending
// No exception case
// Normal exit, restore registers and exit.
// Make sure all code is generated
// Fill-out other meta info
}
//
// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
//
// Generate a stub that calls into vm to find out the proper destination
// of a java call. All the argument registers are live at this point
// but since this is generic code we don't know what they are and the caller
// must do any gc of the args.
//
// allocate space for the code
int frame_size_in_words;
// Set an oopmap for the call site.
// We need this not only for callee-saved registers, but also for volatile
// registers that the compiler might be keeping live across a safepoint.
// rax contains the address we are going to jump to assuming no exception got installed
// clear last_Java_sp
__ reset_last_Java_frame(false, false);
// check for pending exceptions
// get the returned methodOop
// We are back the the original state on entry and ready to go.
// Pending exception after the safepoint
// exception pending => remove activation and forward to exception handler
// -------------
// make sure all code is generated
// return the blob
// frame_size_words or bytes??
return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
}
#ifdef COMPILER2
// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
//
//------------------------------generate_exception_blob---------------------------
// creates exception blob at the end
// Using exception blob, this code is jumped from a compiled method.
// (see emit_exception_handler in x86_64.ad file)
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
// (i.e. callee save registers) unwind the frame and jump to the
// exception handler for the nmethod if there is no Java level handler
// for the nmethod.
//
// This code is entered with a jmp.
//
// Arguments:
// rax: exception oop
// rdx: exception pc
//
// Results:
// rax: exception oop
// rdx: exception pc in caller or ???
// destination: exception handler of caller
//
// Note: the exception pc MUST be at a call (precise debug information)
// Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved.
//
// Allocate space for the code
// Setup code generation tools
// Exception pc is 'return address' for stack walker
// Save callee-saved registers. See x86_64.ad.
// rbp is an implicitly saved callee saved register (i.e. the calling
// there are no callee save registers now that adapter frames are gone.
// Store exception in Thread object. We cannot pass any arguments to the
// handle_exception call, since we do not want to make any assumption
// about the size of the frame where the exception happened in.
// c_rarg0 is either rdi (Linux) or rcx (Windows).
// This call does all the hard work. It checks if an exception handler
// exists in the method.
// If so, it returns the handler address.
// If not, it prepares for stack-unwinding, restoring the callee-save
// registers of the frame being removed.
//
// address OptoRuntime::handle_exception_C(JavaThread* thread)
// At a method handle call, the stack may not be properly aligned
// when returning with an exception.
// Set an oopmap for the call site. This oopmap will only be used if we
// are unwinding the stack. Hence, all locations will be dead.
// Callee-saved registers will be the same as the frame above (i.e.,
// handle_exception_stub), since they were restored when we got the
// exception.
__ reset_last_Java_frame(false, true);
// Restore callee-saved registers
// rbp is an implicitly saved callee saved register (i.e. the calling
// there are no callee save registers no that adapter frames are gone.
// rax: exception handler
// Restore SP from BP if the exception PC is a MethodHandle call site.
// We have a handler in rax (could be deopt blob).
// Get the exception oop
// Get the exception pc in case we are deoptimized
#ifdef ASSERT
#endif
// Clear the exception oop so GC no longer processes it as a root.
// rax: exception oop
// r8: exception handler
// rdx: exception pc
// Jump to handler
// Make sure all code is generated
// Set exception blob
}
#endif // COMPILER2