/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.inline.hpp"
#include "code/debugInfo.hpp"
#include "code/debugInfoRec.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/oopMap.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/locknode.hpp"
#include "opto/machnode.hpp"
#include "opto/output.hpp"
#include "opto/regalloc.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
#include "runtime/handles.inline.hpp"
#include "utilities/xmlstream.hpp"
extern uint size_java_to_interp();
extern uint reloc_java_to_interp();
extern uint size_exception_handler();
extern uint size_deopt_handler();
#ifndef PRODUCT
#define DEBUG_ARG(x) , x
#else
#define DEBUG_ARG(x)
#endif
//------------------------------Output-----------------------------------------
// Convert Nodes to instruction bits and pass off to the VM
// RootNode goes
// The number of new nodes (mostly MachNop) is proportional to
// the number of java calls and inner loops which are aligned.
"out of nodes before code generation" ) ) {
return;
}
// Make sure I can find the Start Node
// Replace StartNode with prolog
// Virtual methods need an unverified entry point
if( is_osr_compilation() ) {
if( PoisonOSREntry ) {
// TODO: Should use a ShouldNotReachHereNode...
}
} else {
// Insert unvalidated entry point
}
}
// Break before main entry point
#ifndef PRODUCT
||(OptoBreakpoint && is_method_compilation())
||(OptoBreakpointOSR && is_osr_compilation())
||(OptoBreakpointC2R && !_method)
#endif
) {
// checking for _method means that OptoBreakpoint does not apply to
// runtime stubs or frame converters
}
// Insert epilogs before every return
if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
//_regalloc->set_bad(epilog->_idx); // Already initialized this way.
}
}
}
# ifdef ENABLE_ZAP_DEAD_LOCALS
if ( ZapDeadCompiledLocals ) Insert_zap_nodes();
# endif
blk_starts[0] = 0;
// Initialize code buffer and process short branches.
#ifndef PRODUCT
if (trace_opto_output()) {
n->dump();
}
}
}
#endif
if (failing()) return;
BuildOopMaps();
if (failing()) return;
}
// Determine if we need to generate a stack overflow check.
// Do it if the method is not a stub function and
// has java calls or has frame size > vm_page_size/8.
}
// Determine if we need to generate a register stack overflow check.
// This is only used on architectures which have split register
// and memory stacks (ie. IA64).
// Bang if the method is not a stub function and has java calls
}
# ifdef ENABLE_ZAP_DEAD_LOCALS
// In order to catch compiler oop-map bugs, we have implemented
// a debugging mode called ZapDeadCompilerLocals.
// This mode causes the compiler to insert a call to a runtime routine,
// "zap_dead_locals", right before each place in compiled code
// that could potentially be a gc-point (i.e., a safepoint or oop map point).
// The runtime routine checks that locations mapped as oops are really
// oops, that locations mapped as values do not look like oops,
// and that locations mapped as dead are not used later
// (by zapping them to an invalid address).
bool skip = false;
// Dink with static counts because code code without the extra
// runtime calls is MUCH faster for debugging purposes
if ( CompileZapFirst == 0 ) ; // nothing special
else if ( CompileZapFirst == CompiledZap_count() )
warning("starting zap compilation after skipping");
else if ( CompileZapLast == CompiledZap_count() )
warning("about to compile last zap");
++_CompiledZap_count; // counts skipped zaps, too
if ( skip ) return;
return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care
// Insert call to zap runtime stub before every node with an oop map
// Determining if we should insert a zap-a-lot node in output.
// We do that for all nodes that has oopmap info, except for calls
// to allocation. Calls to allocation passes in the old top-of-eden pointer
// and expect the C code to reset it. Hence, there can be no safepoints between
// the inlined-allocation and the call to new_Java, etc.
// We also cannot zap monitor calls, as they must hold the microlock
// during the call to Zap, which also wants to grab the microlock.
if ( insert ) { // it is MachSafePoint
if ( !n->is_MachCall() ) {
insert = false;
} else if ( n->is_MachCall() ) {
) {
insert = false;
}
}
if (insert) {
++j;
}
}
}
}
}
new (this) CallStaticJavaNode( tf,
// We need to copy the OopMap from the site we're zapping at.
// We have to make a copy, because the zap site might not be
// a call site, and zap_dead is a call site.
// Add the cloned OopMap to the zap node
}
//------------------------------is_node_getting_a_safepoint--------------------
// This code duplicates the logic prior to the call of add_safepoint
// below in this file.
if( n->is_MachSafePoint() ) return true;
return false;
}
# endif // ENABLE_ZAP_DEAD_LOCALS
//------------------------------compute_loop_first_inst_sizes------------------
// Compute the size of first NumberOfLoopInstrToAlign instructions at the top
// of a loop. When aligning a loop we need to provide enough instructions
// in cpu's fetch buffer to feed decoders. The loop alignment could be
// avoided if we have enough instructions in fetch buffer at the head of a loop.
// By default, the size is set to 999999 by Block's constructor so that
// a loop will be aligned if the size is not reset here.
//
// Note: Mach instructions could contain several HW instructions
// so the size is estimated only.
//
// The next condition is used to gate the loop alignment optimization.
// Don't aligned a loop if there are enough instructions at the head of a loop
// or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
// is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
// equal to 11 bytes which is the largest address NOP instruction.
// Check the first loop's block which requires an alignment.
// Check subsequent fallthrough blocks if the loop's first
// block(s) does not have enough instructions.
while( inst_cnt > 0 &&
i < last_block &&
!nb->has_successor(b) ) {
i++;
} // while( inst_cnt > 0 && i < last_block )
} // f( b->head()->is_Loop() )
} // for( i <= last_block )
} // if( MaxLoopPad < OptoLoopAlignment-1 )
}
//----------------------shorten_branches---------------------------------------
// The architecture description provides short branch variants for some long
// branch instructions. Replace eligible long branches with short branches.
// ------------------
// Compute size of each block, method size, and relocation information size
bool has_short_branch_candidate = false;
// Initialize the sizes to 0
code_size = 0; // Size in bytes of generated code
stub_size = 0; // Size in bytes of all stub entries
// Size in bytes of all relocation entries, including those in local stubs.
// Start with 2-bytes of reloc info for the unvalidated entry point
// Make three passes. The first computes pessimistic blk_starts,
// relative jmp_offset and reloc_size information. The second performs
// short branch substitution using the pessimistic sizing. The
// third inserts nops where needed.
// Step one, perform a pessimistic sizing pass.
// During short branch replacement, we store the relative (to blk_starts)
// offset of jump in jmp_offset, rather than the absolute offset of jump.
// This is so that we do not need to recompute sizes of all nodes when
// we compute correct blk_starts in our next sizing pass.
jmp_offset[i] = 0;
jmp_size[i] = 0;
jmp_nidx[i] = -1;
DEBUG_ONLY( jmp_target[i] = 0; )
DEBUG_ONLY( jmp_rule[i] = 0; )
// Sum all instruction sizes to compute block size
// Handle machine instruction nodes
if( mach->is_MachCall() ) {
// This destination address is NOT PC-relative
stub_size += size_java_to_interp();
}
} else if (mach->is_MachSafePoint()) {
// nop to disambiguate the two safepoints.
// ScheduleAndBundle() can rearrange nodes in a block,
// check for all offsets inside this block.
if (last_call_adr >= blk_starts[i]) {
}
}
if (mach->avoid_back_to_back()) {
// Nop is inserted between "avoid back to back" instructions.
// ScheduleAndBundle() can rearrange nodes in a block,
// check for all offsets inside this block.
if (last_avoid_back_to_back_adr >= blk_starts[i]) {
}
}
if (mach->may_be_short_branch()) {
if (!nj->is_MachBranch()) {
#ifndef PRODUCT
#endif
}
jmp_offset[i] = blk_size;
jmp_nidx[i] = j;
has_short_branch_candidate = true;
}
}
// Remember end of call offset
}
// Remember end of avoid_back_to_back offset
}
}
// When the next block starts a loop, we may insert pad NOP
// instructions. Since we cannot know our future alignment,
// assume the worst.
if (i< nblocks-1) {
if (max_loop_pad > 0) {
// If either is the last instruction in this block, bump by
// max_loop_pad in lock-step with blk_size, so sizing
// calculations in subsequent blocks still can conservatively
// detect that it may the last instruction in this block.
}
}
blk_size += max_loop_pad;
}
}
// Save block size; update total method size
}
// Step two, replace eligible long jumps.
bool progress = true;
while (has_short_branch_candidate && progress) {
progress = false;
has_short_branch_candidate = false;
int adjust_block_start = 0;
#ifdef ASSERT
int j;
// Find the branch; ignore trailing NOPs.
break;
}
#endif
// This requires the TRUE branch target be in succs[0]
if (bnum > i) { // adjust following block's offset
}
// In the following code a nop could be inserted before
// the branch which will increase the backward distance.
if (needs_padding && offset <= 0)
// We've got a winner. Replace this branch.
// Update the jmp_size.
// Conservatively take into accound padding between
// avoid_back_to_back branches. Previous branch could be
// converted into avoid_back_to_back branch during next
// rounds.
jmp_offset[i] += nop_size;
}
mach = replacement;
progress = true;
} else {
// The jump distance is not short, try again during next iteration.
has_short_branch_candidate = true;
}
} // (mach->may_be_short_branch())
mach->avoid_back_to_back())) {
}
}
}
#ifdef ASSERT
if (jmp_target[i] != 0) {
tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
}
assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp");
}
}
#endif
// Step 3, compute the offsets of all blocks, will be done in fill_buffer()
// after ScheduleAndBundle().
// ------------------
// Compute size for code buffer
// Relocation records
// Adjust reloc_size to number of record of relocation info
// Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
// a relocation index.
// The CodeBuffer will expand the locs array if this estimate is too low.
}
//------------------------------FillLocArray-----------------------------------
// Create a bit of debug info and append it to the array. The mapping is from
// Java local or expression stack to constant, register or stack-slot. For
// doubles, insert 2 mappings and return 1 (to tell the caller that the next
// entry has been taken care of and caller should skip it).
static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
// This should never have accepted Bad before
}
return sv;
}
}
// Otherwise..
return NULL;
}
ObjectValue* sv ) {
}
// Old functionality:
// return
// New functionality:
// Assert if the local is not top. In product mode let the new node
// override the old entry.
return;
}
}
// Is it a safepoint scalar object node?
if (local->is_SafePointScalarObject()) {
}
}
return;
}
// Grab the register number for the local
// Record the double as two float registers.
// The register mask for such a value always specifies two adjacent
// float registers, with the lower register number even.
// Normally, the allocation of high and low words to these registers
// is irrelevant, because nearly all operations on register pairs
// (e.g., StoreD) treat them as a single unit.
// Here, we assume in addition that the words in these two registers
// stored "naturally" (by operations like StoreD and double stores
// within the interpreter) such that the lower-numbered register
// is written to the lower memory address. This may seem like
// a machine dependency, but it is not--it is a requirement on
// the author of the <arch>.ad file to ensure that, for every
// the word in the even single-register is stored to the first
// memory word. (Note that register numbers are completely
// arbitrary, and are not tied to any machine-level encodings.)
#ifdef _LP64
// width 64-bit stack slot.
}
#else //_LP64
#ifdef SPARC
// For SPARC we have to swap high and low words for
// long values stored in a single-register (g0-g7).
} else
#endif //SPARC
// The convention the interpreter uses is that the second local
// holds the first raw word of the native double representation.
// This is actually reasonable, since locals and stack arrays
// grow downwards in all implementations.
// (If, on some machine, the interpreter's Java locals or stack
// were to grow upwards, the embedded doubles would be word-swapped.)
}
#endif //_LP64
} else {
array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal ));
}
return;
}
// No register. It must be constant data.
switch (t->base()) {
ShouldNotReachHere(); // Caller should skip 2nd halves
break;
break;
break;
if (t == TypeNarrowOop::NULL_PTR) {
} else {
array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
}
break;
break;
// A return address (T_ADDRESS).
#ifdef _LP64
// Must be restored to the full-width 64-bit stack slot.
#else
#endif
break;
float f = t->is_float_constant()->getf();
break;
}
#ifdef _LP64
#else
// Repack the double as two jints.
// The convention the interpreter uses is that the second local
// holds the first raw word of the native double representation.
// This is actually reasonable, since locals and stack arrays
// grow downwards in all implementations.
// (If, on some machine, the interpreter's Java locals or stack
// were to grow upwards, the embedded doubles would be word-swapped.)
#endif
break;
}
#ifdef _LP64
#else
// Repack the long as two jints.
// The convention the interpreter uses is that the second local
// holds the first raw word of the native double representation.
// This is actually reasonable, since locals and stack arrays
// grow downwards in all implementations.
// (If, on some machine, the interpreter's Java locals or stack
// were to grow upwards, the embedded doubles would be word-swapped.)
#endif
break;
}
break;
default:
break;
}
}
// Determine if this node starts a bundle
return (_node_bundling_limit > n->_idx &&
}
//--------------------------Process_OopMap_Node--------------------------------
// Handle special safepoint nodes for synchronization
#ifdef ENABLE_ZAP_DEAD_LOCALS
#endif
bool is_method_handle_invoke = false;
bool return_oop = false;
// Add the safepoint in the DebugInfoRecorder
if( !mach->is_MachCall() ) {
} else {
// Is the call a MethodHandle call?
if (mcall->is_MachCallJava()) {
is_method_handle_invoke = true;
}
}
// Check if a call returns an object.
if (mcall->return_value_is_used() &&
return_oop = true;
}
}
// Loop over the JVMState list to add scope information
// Do not skip safepoints with a NULL method, they need monitor info
// Allocate the object pool for scalar-replaced objects -- the map from
// small-integer keys (which can be recorded in the local and ostack
// arrays) to descriptions of the object state.
// Visit scopes from oldest to youngest.
int idx;
// Safepoints that do not have method() set only provide oop-map and monitor info
// to support GC; these do not support deoptimization.
"JVMS local count must match that of the method");
// Add Local and Expression Stack Information
// Insert locals into the locarray
}
// Insert expression stack entries into the exparray
}
// Add in mappings of the monitors
!method->is_synchronized() ||
num_mon > 0 ||
"monitors must always exist for synchronized methods");
// Build the growable array of ScopeValues for exp stack
// Loop over monitors and insert into array
// Grab the node that defines this monitor
// Create ScopeValue for object
if( obj_node->is_SafePointScalarObject() ) {
}
}
} else {
}
} else {
}
}
// We dump the object pool first, since deoptimization reads it in first.
// Build first class objects to pass to scope
// Make method available for all Safepoints
// Describe the scope here
assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI");
// Now we can describe the scope.
debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, return_oop, locvals, expvals, monvals);
} // End jvms loop
// Mark the end of the scope set.
}
// A simplified version of Process_OopMap_Node, to handle non-safepoints.
class NonSafepointEmitter {
Compile* C;
int _pending_offset;
void emit_non_safepoint();
public:
this->C = compile;
_pending_offset = 0;
}
if (!C->debug_info()->recording_non_safepoints()) return;
if (_pending_jvms != NULL &&
// Repeated JVMS? Stretch it up here.
} else {
if (_pending_jvms != NULL &&
_pending_offset < pc_offset) {
}
// This is the only way _pending_jvms can become non-NULL:
}
}
}
// Stay out of the way of real safepoints:
if (_pending_jvms != NULL &&
_pending_offset < pc_offset) {
}
}
void flush_at_end() {
if (_pending_jvms != NULL) {
}
}
};
// Clear it now:
// Visit scopes from oldest to youngest.
}
// Mark the end of the scope set.
}
// helper for fill_buffer bailout logic
// Do not turn off compilation if a single giant method has
// blown the code cache size.
C->record_failure("excessive request to CodeCache");
} else {
// Let CompilerBroker disable further compilations.
C->record_failure("CodeCache is full");
}
}
//------------------------------init_buffer------------------------------------
// Set the initially allocated size
// The extra spacing after the code is necessary on some platforms.
// Sometimes we need to patch in a jump after the last instruction,
// if the nmethod has been deoptimized. (See 4932387, 4894843.)
// Compute the byte offset where we can store the deopt pc.
if (fixed_slots() != 0) {
}
// Compute prolog code size
_method_size = 0;
#ifdef IA64
if (save_argument_registers()) {
// 4815101: this is a stub with implicit and unknown precision fp args.
// The usual spill mechanism can only generate stfd's in this case, which
// doesn't work if the fp reg to spill contains a single-precision denorm.
// Instead, we hack around the normal spill mechanism using stfspill's and
// ldffill's in the MachProlog and MachEpilog emit methods. We allocate
// space here for the fp arg regs (f8-f15) we're going to thusly spill.
//
// If we ever implement 16-byte 'registers' == stack slots, we can
}
#endif
if (has_mach_constant_base_node()) {
// Fill the constant table.
// Note: This must happen before shorten_branches.
// If the node is a MachConstantNode evaluate the constant
// value section.
if (n->is_MachConstant()) {
machcon->eval_constant(C);
}
}
}
// Calculate the offsets of the constants and the size of the
// constant table (including the padding to the next section).
}
// Initialize the space for the BufferBlob used to find and verify
// instruction size in MachNode::emit_size()
// Pre-compute the length of blocks and replace
// long branches with short if machine supports it.
// nmethod and CodeBuffer count stubs & constants as part of method's code.
if (StressCodeBuffers)
code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
int total_req =
code_req +
pad_req +
stub_req +
deopt_handler_req; // deopt handler
if (has_method_handle_invokes())
// Have we run out of code space?
turn_off_compiler(this);
return NULL;
}
// Configure the code buffer.
// fill in the nop array for bundling computations
return cb;
}
//------------------------------fill_buffer------------------------------------
// blk_starts[] contains offsets calculated during short branches processing,
// offsets should not be increased during following steps.
// Compute the size of first NumberOfLoopInstrToAlign instructions at head
// of a loop. It is used to determine the padding for loop alignment.
// Create oopmap set.
_oop_map_set = new OopMapSet();
// !!!!! This preserves old handling of oopmaps for now
// Count and start of implicit null check instructions
// Count and start of calls
int previous_offset = 0;
int current_offset = 0;
#ifdef ASSERT
#endif
// Create an array of unused labels, one for each basic block, if printing is enabled
#ifndef PRODUCT
if (print_assembly())
#endif
// Emit the constant table.
if (has_mach_constant_base_node()) {
}
// Create an array of labels, one for each basic block
blk_labels[i].init();
}
// ------------------
// Now fill in the code buffer
// If this block needs to start aligned (i.e, can be reached other
// than by falling-thru from the previous block), then force the
// start of a new bundle.
cb->flush_bundle(true);
#ifdef ASSERT
if (!b->is_connector()) {
}
jmp_target[i] = 0;
jmp_offset[i] = 0;
jmp_size[i] = 0;
jmp_rule[i] = 0;
#endif
// Define the label at the beginning of the basic block
// Emit block normally, except for last instruction.
// Emit means "dump code bits into code buffer".
// Get the node
// See if delay slots are supported
if (valid_bundle_info(n) &&
node_bundling(n)->used_in_unconditional_delay()) {
delay_slot = n;
continue;
}
// If this starts a new instruction group, then flush the current one
// (but allow split bundles)
cb->flush_bundle(false);
// The following logic is duplicated in the code ifdeffed for
// ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It
// should be factored out. Or maybe dispersed to the nodes?
bool is_mcall = false;
if (n->is_Mach()) {
is_mcall = n->is_MachCall();
// If this requires all previous instructions be flushed, then do so
cb->flush_bundle(true);
}
// A padding may be needed again since a previous instruction
// could be moved to delay slot.
// align the instruction if necessary
// Make sure safepoint node for polling is distinct from a call's
// return by adding a nop if needed.
}
// Avoid back to back some instructions.
}
if(padding > 0) {
last_inst++;
cb->flush_bundle(true);
}
// Remember the start of the last call in a basic block
if (is_mcall) {
// This destination address is NOT PC-relative
// Save the return address
if (mcall->is_MachCallLeaf()) {
is_mcall = false;
is_sfn = false;
}
}
// sfn will be valid whenever mcall is valid now because of inheritance
// Handle special safepoint nodes for synchronization
if (!is_mcall) {
// !!!!! Stubs only need an oopmap right now, so bail out
// Write the oopmap directly to the code blob??!!
# ifdef ENABLE_ZAP_DEAD_LOCALS
# endif
continue;
}
} // End synchronization
} // End if safepoint
// If this is a null check, then add the start of the previous instruction to the list
else if( mach->is_MachNullCheck() ) {
}
// If this is a branch, then fill in the label with the target BB's label
else if (mach->is_MachBranch()) {
// This requires the TRUE branch target be in succs[0]
// Try to replace long branch if delay slot is not used,
// it is mostly for back branches since forward branch's
// distance is not updated yet.
if (block_num >= i) {
// Current and following block's offset are not
// finilized yet, adjust distance by the difference
// between calculated and final offsets of current block.
}
// In the following code a nop could be inserted before
// the branch which will increase the backward distance.
if (needs_padding && offset <= 0)
// We've got a winner. Replace this branch.
// Update the jmp_size.
// Insert padding between avoid_back_to_back branches.
last_inst++;
cb->flush_bundle(true);
}
#ifdef ASSERT
jmp_target[i] = block_num;
#endif
n = replacement;
mach = replacement;
}
}
for (uint h = 0; h < b->_num_succs; h++) {
}
}
}
}
#ifdef ASSERT
// Check that oop-store precedes the card-mark
int count = 0;
count++;
}
// Note: This test can provide a false failure if other precedence
// edges have been added to the storeCMNode.
}
}
#endif
else if (!n->is_Proj()) {
// Remember the beginning of the previous instruction, in case
// it's followed by a flag-kill and a null-check. Happens on
// Intel all the time, with add-to-memory kind of opcodes.
}
}
// Verify that there is sufficient space remaining
turn_off_compiler(this);
return;
}
// Save the offset for the listing
#ifndef PRODUCT
#endif
// "Normal" instruction case
#ifdef ASSERT
n->dump();
assert(false, "wrong size of mach node");
}
#endif
// mcall is last "call" that can be a safepoint
// record it so we can see if a poll will directly follow it
// in which case we'll need a pad to make the PcDesc sites unique
// see 5010568. This can be slightly inaccurate but conservative
// in the case that return address is not actually at current_offset.
// This is a small price to pay.
if (is_mcall) {
}
// Avoid back to back some instructions.
}
// See if this instruction has a delay slot
// Back up 1 instruction
// Save the offset for the listing
#ifndef PRODUCT
#endif
// Support a SafePoint in the delay slot
if (delay_slot->is_MachSafePoint()) {
// !!!!! Stubs only need an oopmap right now, so bail out
// Write the oopmap directly to the code blob??!!
# ifdef ENABLE_ZAP_DEAD_LOCALS
# endif
delay_slot = NULL;
continue;
}
// Generate an OopMap entry
}
// Insert the delay slot instruction
// Don't reuse it
delay_slot = NULL;
}
} // End for all instructions in block
// If the next block is the top of a loop, pad this block out to align
// the loop top a little. Helps prevent pipe stalls at loop back branches.
if (i < nblocks-1) {
if( padding > 0 ) {
}
}
// Verify that the distance for generated before forward
// short branches is still valid.
guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size");
// Save new block start offset
blk_starts[i] = blk_offset;
} // End of for all blocks
// Offset too large?
if (failing()) return;
// Define a pseudo-label at the end of the code
// Compute the size of the first block
#ifdef ASSERT
if (jmp_target[i] != 0) {
tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
assert(false, "Displacement too large for short jmp");
}
}
}
#endif
// ------------------
#ifndef PRODUCT
// Information on the size of the method, without the extraneous code
#endif
// ------------------
// Fill in exception table entries.
// Only java methods have exception handlers and deopt handlers
if (_method) {
// Emit the exception handler code.
// Emit the deopt handler code.
// Emit the MethodHandle deopt handler code (if required).
if (has_method_handle_invokes()) {
// We can use the same code as for the normal deopt handler, we
// just need a different entry point address.
}
}
// One last check for failed CodeBuffer::expand:
turn_off_compiler(this);
return;
}
#ifndef PRODUCT
// Dump the assembly code, including basic-block numbers
if (print_assembly()) {
// This output goes directly to the tty, not the compiler log.
// To enable tools to match it up with the compilation activity,
// be sure to tag this tty output with the compile ID.
is_osr_compilation() ? " compile_kind='osr'" :
"");
}
print_codes();
}
}
}
}
#endif
}
void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) {
int j;
// Find the branch; ignore trailing NOPs.
n = b->_nodes[j];
break;
}
// If we didn't find anything, continue
if( j < 0 ) continue;
// Compute ExceptionHandlerTable subtable entry and add it
// (skip empty blocks)
if( n->is_Catch() ) {
// Get the offset of the return from the call
#ifdef ASSERT
while( b->_nodes[--j]->is_MachProj() ) ;
#endif
// last instruction is a CatchNode, find it's CatchProjNodes
// allocate space
// iterate through all successors
for (int j = 0; j < nof_succs; j++) {
bool found_p = false;
found_p = true;
// add the corresponding handler bci & pco information
// p leads to an exception handler (and is not fall through)
// no duplicates, please
}
}
}
}
// Note: Due to empty block removal, one block may have
// several CatchProj inputs, from the same Catch.
}
// Set the offset of the return from the call
continue;
}
// Handle implicit null exception table updates
if( n->is_MachNullCheck() ) {
continue;
}
} // End of for all blocks fill in exception table entries
}
// Static Variables
#ifndef PRODUCT
#endif
// Initializer for class Scheduling
#ifndef PRODUCT
, _branches(0)
#endif
{
// Create a MachNopNode
// Now that the nops are in the array, save the count
// (but allow entries for the nops)
// This one is persistent within the Compile class
// Allocate space for fixed-size arrays
// Clear the arrays
// Clear the bundling information
sizeof(Pipeline_Use::elaborated_elements));
// Get the last node
}
#ifndef PRODUCT
// Scheduling destructor
Scheduling::~Scheduling() {
}
#endif
// Step ahead "i" cycles
// Update the bundle record, but leave the flags information alone
if (_bundle_instr_count > 0) {
}
// Update the state information
_bundle_instr_count = 0;
_bundle_cycle_number += i;
_bundle_use.step(i);
}
// Update the bundle record
if (_bundle_instr_count > 0) {
_bundle_cycle_number += 1;
}
// Clear the bundling information
_bundle_instr_count = 0;
_bundle_use.reset();
sizeof(Pipeline_Use::elaborated_elements));
}
//------------------------------ScheduleAndBundle------------------------------
// Perform instruction scheduling and bundling over the sequence of
// instructions in backwards order.
// Don't optimize this if it isn't a method
if (!_method)
return;
// Don't optimize this if scheduling is disabled
if (!do_scheduling())
return;
// Scheduling code works only with pairs (8 bytes) maximum.
if (max_vector_size() > 8)
return;
// Create a data structure for all the scheduling information
// Walk backwards over each basic block, computing the needed alignment
// Walk over all the basic blocks
}
//------------------------------ComputeLocalLatenciesForward-------------------
// Compute the latency of all the instructions. This is fairly simple,
// because we already have a legal ordering. Walk over the instructions
// from first to last, and compute the latency of the instruction based
// on the latency of the preceding instruction(s).
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
// Walk over all the schedulable instructions
// This is a kludge, forcing all latency calculations to start at 1.
// Used to allow latency 0 to force an instruction to the beginning
// of the bb
// Walk over all the inputs
if (!def)
continue;
if (latency < l)
latency = l;
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
}
#endif
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
} // end ComputeLocalLatenciesForward
// See if this node fits into the present instruction bundle
// If this is the unconditional delay instruction, then it fits
if (n == _unconditional_delay_slot) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
return (true);
}
// If the node cannot be scheduled this cycle, skip it
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
return (false);
}
instruction_count = 0;
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
return (false);
}
// Don't allow non-machine nodes to be handled this way
if (!n->is_Mach() && instruction_count == 0)
return (false);
// See if there is any overlap
if (delay > 0) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
return false;
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
return true;
}
if (siz == 0) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
return (NULL);
}
// Fast path, if only 1 instruction in the bundle
if (siz == 1) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
_available[0]->dump();
}
#endif
return (_available[0]);
}
// Don't bother, if the bundle is already full
Node *n = _available[i];
// Skip projections, we'll handle them another way
if (n->is_Proj())
continue;
// This presupposed that instructions are inserted into the
// available list in a legality order; i.e. instructions that
// must be inserted first are at the head of the list
if (NodeFitsInBundle(n)) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
n->dump();
}
#endif
return (n);
}
}
}
// Nothing fits in this bundle, choose the highest priority
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
_available[0]->dump();
}
#endif
return _available[0];
}
//------------------------------AddNodeToAvailableList-------------------------
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
n->dump();
}
#endif
// Insert in latency order (insertion sort)
uint i;
for ( i=0; i < _available.size(); i++ )
break;
// Special Check for compares following branches
// Recalculate position, moving to front of same latency
for ( i=0 ; i < _available.size(); i++ )
break;
}
}
// Insert the node in the available list
_available.insert(i, n);
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
}
//------------------------------DecrementUseCounts-----------------------------
if (!def) continue;
continue;
// Compute the latency
// If this does not have uses then schedule it
}
}
//------------------------------AddNodeToBundle--------------------------------
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
n->dump();
}
#endif
// Remove this from the available list
uint i;
for (i = 0; i < _available.size(); i++)
if (_available[i] == n)
break;
_available.remove(i);
// See if this fits in the current bundle
// Check for instructions to be placed in the delay slot. We
// do this before we actually schedule the current instruction,
// because the delay slot follows the current instruction.
if (Pipeline::_branch_has_delay_slot &&
node_pipeline->hasBranchDelay() &&
// Conditional branches can support an instruction that
// is unconditionally executed and not dependent by the
// branch, OR a conditionally executed instruction if
// the branch is taken. In practice, this means that
// the first instruction at the branch target is
// copied to the delay slot, and the branch goes to
// the instruction after that at the branch target
if ( n->is_MachBranch() ) {
#ifndef PRODUCT
_branches++;
#endif
// At least 1 instruction is on the available list
// that is not dependent on the branch
Node *d = _available[i];
// Don't allow safepoints in the branch shadow, that will
// cause a number of difficulties
!avail_pipeline->hasMultipleBundles() &&
!avail_pipeline->hasBranchDelay() &&
Pipeline::instr_has_unit_size() &&
NodeFitsInBundle(d) &&
!node_bundling(d)->used_in_delay()) {
if (d->is_Mach() && !d->is_MachSafePoint()) {
// A node that fits in the delay slot was found, so we need to
// set the appropriate bits in the bundle pipeline information so
// that it correctly indicates resource usage. Later, when we
// attempt to add this instruction to the bundle, we will skip
// setting the resource usage.
_next_node = d;
#ifndef PRODUCT
#endif
break;
}
}
}
}
// No delay slot, add a nop to the usage
if (!_unconditional_delay_slot) {
// See if adding an instruction in the delay slot will overflow
// the bundle.
if (!NodeFitsInBundle(_nop)) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
step(1);
}
_next_node = _nop;
}
// See if the instruction in the delay slot requires a
// step of the bundles
if (!NodeFitsInBundle(n)) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
// Update the state information
_bundle_instr_count = 0;
_bundle_cycle_number += 1;
}
}
// Get the number of instructions
instruction_count = 0;
// Compute the latency information
if (relative_latency < 0)
relative_latency = 0;
// Does not fit in this bundle, start a new one
if (delay > 0) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
}
}
// If this was placed in the delay slot, ignore it
if (n != _unconditional_delay_slot) {
if (delay == 0) {
if (node_pipeline->hasMultipleBundles()) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
step(1);
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
step(1);
}
}
// Set the node's latency
// Now merge the functional unit information
// Increment the number of instructions in this bundle
// Remember this node for later
if (n->is_Mach())
_next_node = n;
}
// It's possible to have a BoxLock in the graph and in the _bbs mapping but
// not in the bb->_nodes array. This happens for debug-info-only BoxLocks.
// 'Schedule' them (basically ignore in the schedule) but do not insert them
// into the block. All other scheduled nodes get put in the schedule here.
// not an unallocated boxlock
// Push any trailing projections
}
}
// Put the instruction in the schedule list
_scheduled.push(n);
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
// Walk all the definitions, decrementing use counts, and
// if a definition has a 0 use count, place it in the available list.
DecrementUseCounts(n,bb);
}
//------------------------------ComputeUseCount--------------------------------
// This method sets the use count within a basic block. We will ignore all
// uses outside the current basic block. As we are doing a backwards walk,
// any node we reach that has a use count of 0 may be scheduled. This also
// avoids the problem of cyclic references from phi nodes, as long as phi
// nodes are at the front of the basic block. This method also initializes
// the available list to the set of instructions that have no uses within this
// basic block.
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
// Clear the list of available and scheduled instructions, just in case
_available.clear();
_scheduled.clear();
// No delay slot specified
#ifdef ASSERT
#endif
// Force the _uses count to never go to zero for unscheduable pieces
// of the block
// Iterate backwards over the instructions in the block. Don't count the
// branch projections at end or the block header instructions.
if( n->is_Proj() ) continue; // Projections handled another way
// Account for all uses
if (!inp) continue;
}
}
// If this instruction has a 0 use count, then it is available
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
n->dump();
}
#endif
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
}
// This routine performs scheduling on each basic block in reverse order,
// using instruction latencies and taking into account function unit
// availability.
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
// Walk over all the basic blocks in reverse order
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
}
#endif
// On the head node, skip processing
continue;
// Skip empty, connector blocks
if (bb->is_connector())
continue;
// If the following block is not the sole successor of
// this one, then reset the pipeline information
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
}
#endif
}
// Leave untouched the starting instruction, any Phis, a CreateEx node
// or Top. bb->_nodes[_bb_start] is the first schedulable instruction.
// Things not matched, like Phinodes and ProjNodes don't get scheduled.
// Also, MachIdealNodes do not get scheduled
if( !n->is_Mach() ) continue; // Skip non-machine nodes
!n->is_SpillCopy() ) // Breakpoints, Prolog, etc
continue;
break; // Funny loop structure to be sure...
}
// Compute last "interesting" instruction in block - last instruction we
// might schedule. _bb_end points just after last schedulable inst. We
// normally schedule conditional branches (despite them being forced last
// in the block), because they have delay slots we can fill. Calls all
// have their delay slots filled in the template expansions, so we don't
// bother scheduling them.
// Ignore trailing NOPs.
}
// Exclude unreachable path case when Halt node is in a separate block.
// There must be a prior call. Skip it.
}
} else if( last->is_MachNullCheck() ) {
// Backup so the last null-checked memory instruction is
// outside the schedulable range. Skip over the nullcheck,
// projection, and the memory nodes.
do {
_bb_end--;
} else {
// Set _bb_end to point after last schedulable inst.
_bb_end++;
}
// Compute the register antidependencies for the basic block
// Compute intra-bb latencies for the nodes
// Compute the usage within the block, and set the list of all nodes
// in the block that have no uses within the block.
// Schedule the remaining instructions in the block
while ( _available.size() > 0 ) {
Node *n = ChooseNodeToBundle();
AddNodeToBundle(n,bb);
}
#ifdef ASSERT
uint m;
if( _scheduled[m] == n )
break;
}
#endif
// Now copy the instructions (in reverse order) back to the block
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
if( valid_bundle_info(n) ) {
}
n->dump();
}
}
}
#endif
#ifdef ASSERT
#endif
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
#endif
// Record final node-bundling array location
} // end DoScheduling
//------------------------------verify_good_schedule---------------------------
// Verify that no live-range used in the block is killed in the block by a
// wrong DEF. This doesn't verify live-ranges that span blocks.
// Check for edge existence. Used to avoid adding redundant precedence edges.
return true;
return false;
}
#ifdef ASSERT
//------------------------------verify_do_def----------------------------------
// Check for bad kills
n->dump();
}
}
}
//------------------------------verify_good_schedule---------------------------
// Zap to something reasonable for the verify code
// Walk over the block backwards. Check to make sure each DEF doesn't
// kill a live value (other than the one it's supposed to). Add each
// USE to the live set.
// Fat-proj kills a slew of registers
while( rm.is_NotEmpty() ) {
}
// Get DEF'd registers the normal way
}
// Now make all USEs live
}
}
}
}
// Zap to something reasonable for the Antidependence code
}
#endif
// Conditionally add precedence edges. Avoid putting edges on Projs.
assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" );
}
}
//------------------------------anti_do_def------------------------------------
return;
is_def ) { // Check for a true def (not a kill)
return;
}
// After some number of kills there _may_ be a later def
// Finding a kill requires a real pinch-point.
// Check for not already having a pinch-point.
// Pinch points are Op_Node's.
if ( _pinch_free_list.size() > 0) {
} else {
}
return;
}
//_regalloc->set_bad(pinch->_idx); // Already initialized this way.
if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
}
} else { // Else have valid pinch point
}
// Add output-dependence edge from later def to kill
if( later_def ) // If there is some original def
// See if current kill is also a use, and so is forced to be the pinch-point.
return;
}
}
}
// Add edge from kill to pinch-point
}
//------------------------------anti_do_use------------------------------------
return;
// Use has to be block-local as well
// Insert the pinch-point in the block just after the last use
_bb_end++; // Increase size scheduled region in block
}
}
}
//------------------------------ComputeRegisterAntidependences-----------------
// We insert antidependences between the reads and following write of
// allocated registers to prevent illegal code motion. Hopefully, the
// number of added references should be fairly small, especially as we
// are only adding references within the current basic block.
#ifdef ASSERT
verify_good_schedule(b,"before block local scheduling");
#endif
// A valid schedule, for each register independently, is an endless cycle
// of: a def, then some uses (connected to the def by true dependencies),
// then some kills (defs with no uses), finally the cycle repeats with a new
// def. The uses are allowed to float relative to each other, as are the
// kills. No use is allowed to slide past a kill (or def). This requires
// antidependencies between all uses of a single def and all kills that
// follow, up to the next def. More edges are redundant, because later defs
// & kills are already serialized with true or antidependencies. To keep
// the edge count down, we add a 'pinch point' node if there's more than
// We add dependencies in one bottom-up pass.
// "pinch point", a new Node that's in the graph but not in the block.
// We put the pinch point in _reg_node. If there's already a pinch point
// put an edge from the pinch point to the USE.
// To be expedient, the _reg_node array is pre-allocated for the whole
// compilation. _reg_node is lazily initialized; it either contains a NULL,
// or a valid def/kill/pinch-point, or a leftover node from some prior
// block. Leftover node from some prior block is treated like a NULL (no
// prior def, so no anti-dependence needed). Valid def is distinguished by
// it being in the current block.
bool fat_proj_seen = false;
// Fat-proj kills a slew of registers
// This can add edges to 'n' and obscure whether or not it was a def,
// hence the is_def flag.
fat_proj_seen = true;
while( rm.is_NotEmpty() ) {
}
} else {
// Get DEF'd registers the normal way
}
// Kill projections on a branch should appear to occur on the
// branch, not afterwards, so grab the masks from the projections
// and process them.
while( rm.is_NotEmpty() ) {
anti_do_def( b, n, kill, false );
}
}
}
}
// that must occur afterward and requires an anti-dependence edge.
if( def ) {
}
}
// Do not allow defs of new derived values to float above GC
// points unless the base is definitely available at the GC point.
// Add precedence edge from following safepoint to use of derived pointer
if( last_safept_node != end_node &&
m != last_safept_node) {
if( t->isa_oop_ptr() &&
last_safept_node->add_prec( m );
break;
}
}
}
if( n->jvms() ) { // Precedence edge from derived to safept
// Check if last_safept_node was moved by pinch-point insertion in anti_do_use()
}
for( uint j=last_safept; j > i; j-- ) {
}
last_safept = i;
last_safept_node = m;
}
}
if (fat_proj_seen) {
// Garbage collect pinch nodes that were not consumed.
// They are usually created by a fat kill MachProj for a call.
}
}
//------------------------------garbage_collect_pinch_nodes-------------------------------
// Garbage collect pinch nodes for reuse by other blocks.
//
// The block scheduler's insertion of anti-dependence
// edges creates many pinch nodes when the block contains
// 2 or more Calls. A pinch node is used to prevent a
// combinatorial explosion of edges. If a set of kills for a
// register is anti-dependent on a set of uses (or defs), rather
// than adding an edge in the graph between each pair of kill
// and use (or def), a pinch is inserted between them:
//
// use1 use2 use3
// \ | /
// \ | /
// pinch
// / | \
// / | \
// kill1 kill2 kill3
//
// One pinch node is created per register killed when
// the second call is encountered during a backwards pass
// over the block. Most of these pinch nodes are never
// wired into the graph because the register is never
// used or def'ed in the block.
//
#ifndef PRODUCT
#endif
int trace_cnt = 0;
// no predecence input edges
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
trace_cnt++;
if (trace_cnt > 40) {
trace_cnt = 0;
}
}
#endif
}
}
#ifndef PRODUCT
#endif
}
// Clean up a pinch node for reuse.
uses_found++;
}
}
i -= uses_found; // we deleted 1 or more copies of this edge
}
// May have a later_def entry
}
//------------------------------print_statistics-------------------------------
#ifndef PRODUCT
}
// Print Scheduling Statistics
// Print the size added by nops for bundling
if (_total_method_size > 0)
// Print the number of branch shadows filled
if (Pipeline::_branch_has_delay_slot) {
if (_total_branches > 0)
}
total_instructions += bundle_count * i;
}
if (total_bundles > 0)
((double)total_instructions) / ((double)total_bundles));
}
#endif