macro.cpp revision 2674
1N/A * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. 1N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1N/A * This code is free software; you can redistribute it and/or modify it 1N/A * under the terms of the GNU General Public License version 2 only, as 1N/A * published by the Free Software Foundation. 1N/A * This code is distributed in the hope that it will be useful, but WITHOUT 1N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any // Replace any references to "oldref" in inputs to "use" with "newref". // Returns the number of replacements made. // Copy debug information and adjust JVMState information // Clone old SafePointScalarObjectNodes, adjusting their field contents. // Fast path not-taken, i.e. slow path //--------------------copy_predefined_input_for_runtime_call-------------------- // Set fixed predefined input arguments //------------------------------make_slow_call--------------------------------- // Slow path call has no side-effects, uses few values // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj assert(
false,
"unexpected projection from allocation node.");
// Eliminate a card mark sequence. p2x is a ConvP2XNode // The load is checking if the card has been written so // replace it with zero to fold the test. // It could be only one user, URShift node, in Object.clone() instrinsic // but the new allocation is passed to arraycopy stub and it could not // be scalar replaced. So we don't check the case. // Remove G1 post barrier. // Search for CastP2X->Xor->URShift->Cmp path which // checks if the store done to a different from the value's region. // And replace Cmp with #0 (false) to collapse G1 post barrier. "missing region check in G1 post barrier");
// Remove G1 pre barrier. // Search "if (marking != 0)" check and set it to "false". // There is no G1 pre barrier if previous stored value is NULL // (for example, after initialization). // Now CastP2X can be removed since it is used only on dead path // which currently still alive until igvn optimize it. // Search for a memory operation for the specified memory slice. return mem;
// hit one of our sentinels // we can safely skip over safepoints, calls, locks and membars because we // already know that the object is safe to eliminate. assert(
false,
"unexpected projection");
// Array elements references have the same alias_idx // but different offset and different instance_id. // Can not bypass initialization of the instance // We are looking for stored value, return Initialize node // or memory edge from Allocate node. // Otherwise skip it (the call updated 'mem' value). assert(
false,
"Object is not scalar replaceable if a LoadStore node access its field");
// Given a Memory Phi, compute a value Phi containing the values from stores // Note: this function is recursive, its depth is limied by the "level" argument // Returns the computed Phi, or NULL if it cannot compute it. // Check if an appropriate value phi already exists. // Check if an appropriate new value phi already exists. return NULL;
// Give up: phi tree too deep // create a new Phi for the value // hit a sentinel, return appropriate 0 value return NULL;
// can't find a value on this path assert(
false,
"Object is not scalar replaceable if a LoadStore node access its field");
assert(
false,
"unknown node on this path");
return NULL;
// unknown node on this path // Search the last value stored into the object's field. return NULL;
// found a loop, give up done =
true;
// hit a sentinel, return appropriate 0 value done =
true;
// Something go wrong. // try to find a phi's unique input assert(
false,
"unexpected node");
// hit a sentinel, return appropriate 0 value // attempt to produce a Phi reflecting the values on the input paths of the Phi // Check the possibility of scalar replacement. // Scan the uses of the allocation to check for anything that would // prevent us from eliminating it. // All users were eliminated. // Object is passed as argument. // Do scalar replacement. if (
res !=
NULL) {
// Could be NULL when there are no users // find the fields of the class which will be needed for safepoint debug information // find the array's elements which will be needed for safepoint debug information // Process the safepoint uses // Scan object's fields adding an input to the safepoint for each field. for (
int j = 0; j <
nfields; j++) {
// The next code is taken from Parse::do_get_xxx(). // This can happen if the constant oop is non-perm. // Do not "join" in the previous type; it doesn't add value, // and may yield a vacuous result if the field is of interface type. // we weren't able to find a value for this field, // give up on eliminating this allocation // remove any extra entries we added to the safepoint for (
int k = 0; k < j; k++) {
// rollback processed safepoints // remove any extra entries we added to the safepoint for (
int k = 0; k <
nfields; k++) {
// Now make a pass over the debug information replacing any references // to SafePointScalarObjectNode with the allocated object. tty->
print(
"=== At SafePoint node %d can't find value of Field: ",
}
else {
// Array's element tty->
print(
"=== At SafePoint node %d can't find value of array element [%d]",
tty->
print(
", which prevents elimination of: ");
// Enable "DecodeN(EncodeP(Allocate)) --> Allocate" transformation // to be able scalar replace the allocation. // Now make a pass over the debug information replacing any references // to the allocated object with "sobj" // Process users of eliminated allocation. // Verify that there is no dependent MemBarVolatile nodes, // they should be removed during IGVN, see MemBarNode::Ideal(). "MemBarVolatile should be eliminated for non-escaping object");
assert(
res->
outcnt() == 0,
"all uses of allocated objects must be deleted");
// Process other users of allocation's projections // Eliminate Initialize node. // raw memory addresses used only by the initialization assert(
false,
"only Initialize or AddP expected");
log->
head(
"eliminate_allocation type='%d'",
//---------------------------set_eden_pointers------------------------- if (
UseTLAB) {
// Private allocation: load from TLS }
else {
// Shared allocation: load from globals//============================================================================= // Allocation attempts to be fast in the case of frequent small objects. // It breaks down like this: // 1) Size in doublewords is computed. This is a constant for objects and // variable for most arrays. Doubleword units are used to avoid size // overflow of huge doubleword arrays. We need doublewords in the end for // 2) Size is checked for being 'too large'. Too-large allocations will go // the slow path into the VM. The slow path can throw any required // exceptions, and does all the special checks for very large arrays. The // size test can constant-fold away for objects. For objects with // finalizers it constant-folds the otherway: you always go slow with // 3) If NOT using TLABs, this is the contended loop-back point. // Load-Locked the heap top. If using TLABs normal-load the heap top. // 4) Check that heap top + size*8 < max. If we fail go the slow ` route. // NOTE: "top+size*8" cannot wrap the 4Gig line! Here's why: for largish // "size*8" we always enter the VM, where "largish" is a constant picked small // enough that there's always space between the eden max and 4Gig (old space is // there so it's quite large) and large enough that the cost of entering the VM // is dwarfed by the cost to initialize the space. // 5) If NOT using TLABs, Store-Conditional the adjusted heap top back // down. If contended, repeat at step 3. If using TLABs normal-store // adjusted heap top back down; there is no contention. // 6) If !ZeroTLAB then Bulk-clear the object/array. Fill in klass & mark // 7) Merge with the slow-path; cast the raw memory pointer to the correct //============================================================================= // FastAllocateSizeLimit value is in DOUBLEWORDS. // Allocations bigger than this always go the slow route. // This value must be small enough that allocation attempts that need to // trigger exceptions go the slow route. Also, it must be small enough so // that heap_top + size_in_bytes does not wrap around the 4Gig limit. //=============================================================================j// // The allocator will coalesce int->oop copies away. See comment in // coalesce.cpp about how this works. It depends critically on the exact // code shape produced here, so if you are changing this code shape // make sure the GC info for the heap-top is correct in and around the Node*
length,
// array length for an array allocation // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. // they will not be used if "always_slow" is set // The initial slow comparison is a size check, the comparison // we want to do is a BoolTest::gt // Force slow-path allocation // generate the initial test if necessary // Now make the initial failure test. Usually a too-big test but // might be a TRUE for finalizers or a fancy class check for // Plug the failing-too-big test into the slow-path region }
else {
// No initial test, just fall into next case // generate the fast allocation code unless we know that the initial test will always go slow // Fast path modifies only raw memory. // Load Eden::end. Loop invariant and hoisted. // Note: We set the control input on "eden_end" and "old_eden_top" when using // a TLAB to work around a bug where these values were being moved across // a safepoint. These are not oops, so they cannot be include in the oop // map, but they can be changed by a GC. The proper way to fix this would // be to set the raw memory state when generating a SafepointNode. However // this will require extensive changes to the loop optimization in order to // prevent a degradation of the optimization. // See comment in memnode.hpp, around line 227 in class LoadPNode. // allocate the Region and Phi nodes for the result // We need a Region for the loop-back contended case. // Now handle the passing-too-big test. We fall into the contended // loop-back merge point. // Load(-locked) the heap top. // See note above concerning the control input when using a TLAB // Add to heap top to get a new heap top // Check for needing a GC; compare against heap end // Plug the failing-heap-space-need-gc test into the slow-path region // This completes all paths into the slow merge point }
else {
// No initial slow path needed! // Just fall from the need-GC path straight into the VM call. // No need for a GC. Setup for the Store-Conditional // Grab regular I/O before optional prefetch may change it. // Slow-path does no I/O so just set it to the original I/O. // Name successful fast-path variables // Store (-conditional) the modified eden top back down. // StorePConditional produces flags for a test PLUS a modified raw // If not using TLABs, check to see if there was contention. // If contention, loopback and try again. // Fast-path succeeded with no contention! // Bump total allocated bytes for this thread // Get base of thread-local storage area // Plug in the successful fast-path into the result merge point // Generate slow-path call // Copy debug information and adjust JVMState information, then replace // allocate node with the call // Identify the output projections from the allocate node and // adjust any references to them. // The control and io projections look like: // v---Proj(ctrl) <-----+ v---CatchProj(ctrl) // ^---Proj(io) <-------+ ^---CatchProj(io) // We are interested in the CatchProj nodes. // An allocate node has separate memory projections for the uses on the control and i_o paths // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call) // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so // we end up with a call that has only 1 memory projection // An allocate node has separate i_o projections for the uses on the control and i_o paths // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call) // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so // we end up with a call that has only 1 control projection // if we generated only a slow call, we are done // no uses of the allocation result // Plug slow-path into result merge point // This completes all paths into the result merge point // Helper for PhaseMacroExpand::expand_allocate_common. // Initializes the newly-allocated storage. // Store the klass & mark bits // For now only enable fast locking for non-array types // conservatively small header size: if (k->
is_array_klass())
// we know the exact header size in most cases: // Clear the object body, if necessary. // The init has somehow disappeared; be cautious and clear everything. // This can happen if a node is allocated but an uncommon trap occurs // immediately. In this case, the Initialize gets associated with the // trap, and may be placed in a different (outer) loop, if the Allocate // is in a loop. If (this is rare) the inner loop gets unrolled, then // there can be two Allocates to one Initialize. The answer in all these // edge cases is safety first. It is always safe to clear immediately // within an Allocate, and then (maybe or maybe not) clear some more later. // Try to win by zeroing only what the init does not store. // We can also try to do some peephole optimizations, // such as combining some adjacent subword stores. // We have no more use for this link, since the AllocateNode goes away: // (If we keep the link, it just confuses the register allocator, // who thinks he sees a real use of the address by the membar.) // Generate prefetch instructions for next allocations. // Generate prefetch allocation with watermark check. // As an allocation hits the watermark, we will prefetch starting // at a "distance" away from watermark. // I/O is used for Prefetch // check against new_eden_top // true node, add prefetchdistance // Insert a prefetch for each allocation only on the fast-path // Generate several prefetch instructions only for arrays. // Insert a prefetch for each allocation only on the fast-path // Generate several prefetch instructions only for arrays. // Do not let it float too high, since if eden_top == eden_end, if( i == 0 ) {
// Set control for first prefetch, next follows it //-----------------------mark_eliminated_locking_nodes----------------------- // During EA obj may point to several objects but after few ideal graph // transformations (CCP) it may point to only one non escaping object // (but still using phi), corresponding locks and unlocks will be marked // for elimination. Later obj could be replaced with a new node (new phi) // and which does not have escape information. And later after some graph // reshape other locks and unlocks (which were not marked for elimination // before) are connected to this new obj (phi) but they still will not be // marked for elimination since new obj has no escape information. // Mark all associated (same box and obj) lock and unlock nodes for // elimination if some of them marked already. // Create new "eliminated" BoxLock node and use it // in monitor debug info for the same object. // Note: BoxLock node is marked eliminated only here // and it is used to indicate that all associated lock // and unlock nodes are marked for elimination. // Replace old box node with new box for all users // Mark all associated locks and unlocks. // Replace old box in monitor debug info. }
// if (u->is_SafePoint() }
// for (uint i = 0; i < oldbox->outcnt();) }
// if (!oldbox->is_eliminated()) }
// if (!alock->is_coarsened())// we have determined that this lock/unlock can be eliminated, we simply // eliminate the node without expanding it. // Note: The membar's associated with the lock/unlock are currently not // eliminated. This should be investigated as a future enhancement. // Check that new "eliminated" BoxLock node is created. log->
head(
"eliminate_lock lock='%d'",
// There are 2 projections from the lock. The lock node will // be deleted when its last use is subsumed below. // The memory projection from a lock/unlock is RawMem // The input to a Lock is merged memory, so extract its RawMem input // (unless the MergeMem has been optimized away.) // Seach for MemBarAcquireLock node and delete it also. // Delete FastLock node also if this Lock node is unique user // (a loop peeling may clone a Lock node). // Seach for MemBarReleaseLock node and delete it also. //------------------------------expand_lock_node---------------------- * See the full description in MacroAssembler::biased_locking_enter(). * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) { * // The object is biased. * proto_node = klass->prototype_header; * o_node = thread | proto_node; * x_node = o_node ^ mark_word; * if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ? * if( (x_node & biased_lock_mask) != 0 ) { * // The klass's prototype header is no longer biased. * cas(&mark_word, mark_word, proto_node) * // The klass's prototype header is still biased. * if( (x_node & epoch_mask) != 0 ) { // Expired epoch? * // Different thread or anonymous biased. * old = mark_word & (epoch_mask | age_mask | biased_lock_mask); * if( cas(&mark_word, old, new) == 0 ) { * goto slow_path; // Failed. * // The object is not biased. * if( FastLock(obj) == 0 ) { * OptoRuntime::complete_monitor_locking_Java(obj); // create a Phi for the memory state // First, check mark word for the biased lock pattern. // Get fast path - mark word has the biased lock pattern. // fast_lock_region->in(1) is set to slow path. // Now check that the lock is biased to the current thread and has // the same epoch and bias as Klass::_prototype_header. // Special-case a fresh allocation to avoid building nodes: // Get slow path - mark word does NOT match the value. // region->in(3) is set to fast path - the object is biased to the current thread. // Mark word does NOT match the value (thread | Klass::_prototype_header). // First, check biased pattern. // Get fast path - _prototype_header has the same biased lock pattern. // fast_lock_region->in(2) - the prototype header is no longer biased // and we have to revoke the bias on this object. // We are going to try to reset the mark of this object to the prototype // value and fall through to the CAS-based locking scheme. // Second, check epoch bits. // Get slow path - mark word does NOT match epoch bits. // The epoch of the current bias is not valid, attempt to rebias the object // toward the current thread. // rebiased_region->in(1) is set to fast path. // The epoch of the current bias is still valid but we know // nothing about the owner; it might be set or it might be clear. // Try to acquire the bias of the object using an atomic operation. // If this fails we will go in to the runtime to revoke the object's bias. // Get slow path - Failed to CAS. // region->in(4) is set to fast path - the object is rebiased to the current thread. // Call CAS-based locking scheme (FastLock node). // Get slow path - FastLock failed to lock the object. // region->in(2) is set to fast path - the object is locked to the current thread. // Reset lock's memory edge. // create a Phi for the memory state // Optimize test; set region slot 2 // Slow path can only throw asynchronous exceptions, which are always // de-opted. So the compiler thinks the slow-call can never throw an // exception. If it DOES throw an exception we would need the debug // info removed first (since if it throws there is no monitor). // disconnect fall-through projection from call and create a new one // hook up users of fall-through projection to region // region inputs are now complete //------------------------------expand_unlock_node---------------------- // No need for a null check on unlock // Check for biased locking unlock case, which is a no-op. // See the full description in MacroAssembler::biased_locking_exit(). // create a Phi for the memory state // create a Phi for the memory state // Optimize test; set region slot 2 // No exceptions for unlocking // disconnect fall-through projection from call and create a new one // hook up users of fall-through projection to region // region inputs are now complete //------------------------------expand_macro_nodes---------------------- // Returns true if a failure occurred. // First, attempt to eliminate locks for (
int i=0; i <
cnt; i++) {
// Before elimination mark all associated (same box and obj) // lock and unlock nodes. // Remove it from macro list and put on IGVN worklist to optimize. // Next, attempt to eliminate allocations assert(
false,
"unknown node type in macro list");
// Make sure expansion will not cause node limit to be exceeded. // Worst case is a macro node gets expanded into about 50 nodes. // Allow 50% more for optimization. // nodes are removed from the macro list as they are processed // node is unreachable, so don't try to expand it assert(
false,
"unknown node type in macro list");