macro.cpp revision 708
2362N/A * Copyright 2005-2009 Sun Microsystems, Inc. All Rights Reserved. 0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 0N/A * This code is free software; you can redistribute it and/or modify it 0N/A * under the terms of the GNU General Public License version 2 only, as 2362N/A * published by the Free Software Foundation. 2362N/A * This code is distributed in the hope that it will be useful, but WITHOUT 0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 0N/A * version 2 for more details (a copy is included in the LICENSE file that 0N/A * accompanied this code). 0N/A * You should have received a copy of the GNU General Public License version 0N/A * 2 along with this work; if not, write to the Free Software Foundation, 0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 0N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 0N/A * CA 95054 USA or visit www.sun.com if you need additional information or 0N/A#
include "incls/_precompiled.incl" 0N/A// Replace any references to "oldref" in inputs to "use" with "newref". 0N/A// Returns the number of replacements made. 0N/A // Copy debug information and adjust JVMState information 0N/A // Clone old SafePointScalarObjectNodes, adjusting their field contents. 0N/A // Fast path not-taken, i.e. slow path 0N/A//--------------------copy_predefined_input_for_runtime_call-------------------- 0N/A // Set fixed predefined input arguments 0N/A//------------------------------make_slow_call--------------------------------- 0N/A // Slow path call has no side-effects, uses few values 0N/A // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj 0N/A assert(
false,
"unexpected projection from allocation node.");
0N/A// Eliminate a card mark sequence. p2x is a ConvP2XNode 0N/A// Search for a memory operation for the specified memory slice. 0N/A return mem;
// hit one of our sentinels 0N/A // we can safely skip over safepoints, calls, locks and membars because we 0N/A // already know that the object is safe to eliminate. 0N/A // Array elements references have the same alias_idx 0N/A // but different offset and different instance_id. 0N/A assert(
false,
"Object is not scalar replaceable if a LoadStore node access its field");
0N/A// Given a Memory Phi, compute a value Phi containing the values from stores 0N/A// on the input paths. 0N/A// Note: this function is recursive, its depth is limied by the "level" argument 0N/A// Returns the computed Phi, or NULL if it cannot compute it. 0N/A // Check if an appropriate value phi already exists. 0N/A // Check if an appropriate new value phi already exists. 0N/A return NULL;
// Give up: phi tree too deep 0N/A // create a new Phi for the value 0N/A // hit a sentinel, return appropriate 0 value 0N/A return NULL;
// can't find a value on this path 0N/A assert(
false,
"Object is not scalar replaceable if a LoadStore node access its field");
0N/A return NULL;
// unknown node on this path 0N/A// Search the last value stored into the object's field. 0N/A return NULL;
// found a loop, give up 0N/A done =
true;
// hit a sentinel, return appropriate 0 value 0N/A done =
true;
// Something go wrong. 0N/A // try to find a phi's unique input 0N/A // hit a sentinel, return appropriate 0 value 0N/A // attempt to produce a Phi reflecting the values on the input paths of the Phi 0N/A // Kill all new Phis 0N/A // Something go wrong. 0N/A// Check the possibility of scalar replacement. 0N/A // Scan the uses of the allocation to check for anything that would 0N/A // prevent us from eliminating it. 0N/A // All users were eliminated. 0N/A // Object is passed as argument. 0N/A// Do scalar replacement. 0N/A if (
res !=
NULL) {
// Could be NULL when there are no users 0N/A // find the fields of the class which will be needed for safepoint debug information 0N/A // find the array's elements which will be needed for safepoint debug information 0N/A // Process the safepoint uses 0N/A // Scan object's fields adding an input to the safepoint for each field. 0N/A // The next code is taken from Parse::do_get_xxx(). 0N/A // This can happen if the constant oop is non-perm. 0N/A // Do not "join" in the previous type; it doesn't add value, 3813N/A // and may yield a vacuous result if the field is of interface type. 0N/A // we weren't able to find a value for this field, 0N/A // give up on eliminating this allocation 0N/A // remove any extra entries we added to the safepoint 0N/A for (
int k = 0; k < j; k++) {
0N/A // rollback processed safepoints 0N/A // remove any extra entries we added to the safepoint 3813N/A // Now make a pass over the debug information replacing any references 3813N/A // to SafePointScalarObjectNode with the allocated object. 0N/A }
else {
// Array's element 0N/A tty->
print(
"=== At SafePoint node %d can't find value of array element [%d]",
3813N/A // Enable "DecodeN(EncodeP(Allocate)) --> Allocate" transformation 0N/A // to be able scalar replace the allocation. 0N/A // Now make a pass over the debug information replacing any references 0N/A // to the allocated object with "sobj" 0N/A// Process users of eliminated allocation. 0N/A // Process other users of allocation's projections 0N/A // Eliminate Initialize node. 0N/A // raw memory addresses used only by the initialization 0N/A assert(
false,
"only Initialize or AddP expected");
0N/A//---------------------------set_eden_pointers------------------------- 3813N/A }
else {
// Shared allocation: load from globals 0N/A//============================================================================= 0N/A// A L L O C A T I O N 0N/A// Allocation attempts to be fast in the case of frequent small objects. 0N/A// It breaks down like this: 0N/A// 1) Size in doublewords is computed. This is a constant for objects and 0N/A// variable for most arrays. Doubleword units are used to avoid size 0N/A// overflow of huge doubleword arrays. We need doublewords in the end for 0N/A// 2) Size is checked for being 'too large'. Too-large allocations will go 0N/A// the slow path into the VM. The slow path can throw any required 0N/A// exceptions, and does all the special checks for very large arrays. The 0N/A// size test can constant-fold away for objects. For objects with 0N/A// finalizers it constant-folds the otherway: you always go slow with 0N/A// 3) If NOT using TLABs, this is the contended loop-back point. 0N/A// Load-Locked the heap top. If using TLABs normal-load the heap top. 0N/A// 4) Check that heap top + size*8 < max. If we fail go the slow ` route. 0N/A// NOTE: "top+size*8" cannot wrap the 4Gig line! Here's why: for largish 0N/A// "size*8" we always enter the VM, where "largish" is a constant picked small 0N/A// enough that there's always space between the eden max and 4Gig (old space is 0N/A// there so it's quite large) and large enough that the cost of entering the VM 0N/A// is dwarfed by the cost to initialize the space. 0N/A// 5) If NOT using TLABs, Store-Conditional the adjusted heap top back 0N/A// down. If contended, repeat at step 3. If using TLABs normal-store 0N/A// adjusted heap top back down; there is no contention. 0N/A// 6) If !ZeroTLAB then Bulk-clear the object/array. Fill in klass & mark 0N/A// 7) Merge with the slow-path; cast the raw memory pointer to the correct 3813N/A//============================================================================= 3813N/A// FastAllocateSizeLimit value is in DOUBLEWORDS. 0N/A// Allocations bigger than this always go the slow route. 0N/A// This value must be small enough that allocation attempts that need to 0N/A// trigger exceptions go the slow route. Also, it must be small enough so 0N/A// that heap_top + size_in_bytes does not wrap around the 4Gig limit. 0N/A//=============================================================================j// // The allocator will coalesce int->oop copies away. See comment in // coalesce.cpp about how this works. It depends critically on the exact // code shape produced here, so if you are changing this code shape // make sure the GC info for the heap-top is correct in and around the Node*
length,
// array length for an array allocation // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. // they will not be used if "always_slow" is set // The initial slow comparison is a size check, the comparison // we want to do is a BoolTest::gt // Force slow-path allocation // generate the initial test if necessary // Now make the initial failure test. Usually a too-big test but // might be a TRUE for finalizers or a fancy class check for // Plug the failing-too-big test into the slow-path region }
else {
// No initial test, just fall into next case // generate the fast allocation code unless we know that the initial test will always go slow // Fast path modifies only raw memory. // Load Eden::end. Loop invariant and hoisted. // Note: We set the control input on "eden_end" and "old_eden_top" when using // a TLAB to work around a bug where these values were being moved across // a safepoint. These are not oops, so they cannot be include in the oop // map, but the can be changed by a GC. The proper way to fix this would // be to set the raw memory state when generating a SafepointNode. However // this will require extensive changes to the loop optimization in order to // prevent a degradation of the optimization. // See comment in memnode.hpp, around line 227 in class LoadPNode. // allocate the Region and Phi nodes for the result // We need a Region for the loop-back contended case. // Now handle the passing-too-big test. We fall into the contended // loop-back merge point. // Load(-locked) the heap top. // See note above concerning the control input when using a TLAB // Add to heap top to get a new heap top // Check for needing a GC; compare against heap end // Plug the failing-heap-space-need-gc test into the slow-path region // This completes all paths into the slow merge point }
else {
// No initial slow path needed! // Just fall from the need-GC path straight into the VM call. // No need for a GC. Setup for the Store-Conditional // Grab regular I/O before optional prefetch may change it. // Slow-path does no I/O so just set it to the original I/O. // Store (-conditional) the modified eden top back down. // StorePConditional produces flags for a test PLUS a modified raw // If not using TLABs, check to see if there was contention. // If contention, loopback and try again. // Fast-path succeeded with no contention! // Rename successful fast-path variables to make meaning more obvious // Get base of thread-local storage area // Plug in the successful fast-path into the result merge point // Generate slow-path call // Copy debug information and adjust JVMState information, then replace // allocate node with the call // Identify the output projections from the allocate node and // adjust any references to them. // The control and io projections look like: // v---Proj(ctrl) <-----+ v---CatchProj(ctrl) // ^---Proj(io) <-------+ ^---CatchProj(io) // We are interested in the CatchProj nodes. // An allocate node has separate memory projections for the uses on the control and i_o paths // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call) // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so // we end up with a call that has only 1 memory projection // An allocate node has separate i_o projections for the uses on the control and i_o paths // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call) // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so // we end up with a call that has only 1 control projection // if we generated only a slow call, we are done // no uses of the allocation result // Plug slow-path into result merge point // This completes all paths into the result merge point // Helper for PhaseMacroExpand::expand_allocate_common. // Initializes the newly-allocated storage. // Store the klass & mark bits // For now only enable fast locking for non-array types // conservatively small header size: if (k->
is_array_klass())
// we know the exact header size in most cases: // Clear the object body, if necessary. // The init has somehow disappeared; be cautious and clear everything. // This can happen if a node is allocated but an uncommon trap occurs // immediately. In this case, the Initialize gets associated with the // trap, and may be placed in a different (outer) loop, if the Allocate // is in a loop. If (this is rare) the inner loop gets unrolled, then // there can be two Allocates to one Initialize. The answer in all these // edge cases is safety first. It is always safe to clear immediately // within an Allocate, and then (maybe or maybe not) clear some more later. // Try to win by zeroing only what the init does not store. // We can also try to do some peephole optimizations, // such as combining some adjacent subword stores. // We have no more use for this link, since the AllocateNode goes away: // (If we keep the link, it just confuses the register allocator, // who thinks he sees a real use of the address by the membar.) // Generate prefetch instructions for next allocations. // Generate prefetch allocation with watermark check. // As an allocation hits the watermark, we will prefetch starting // at a "distance" away from watermark. // I/O is used for Prefetch // check against new_eden_top // true node, add prefetchdistance // Insert a prefetch for each allocation only on the fast-path // Generate several prefetch instructions only for arrays. // Do not let it float too high, since if eden_top == eden_end, if( i == 0 ) {
// Set control for first prefetch, next follows it // we have determined that this lock/unlock can be eliminated, we simply // eliminate the node without expanding it. // Note: The membar's associated with the lock/unlock are currently not // eliminated. This should be investigated as a future enhancement. // Create new "eliminated" BoxLock node and use it // in monitor debug info for the same object. // Replace old box node with new box for all users continue;
// It will be removed below // oldbox could be referenced in debug info also // Replace old box in monitor debug info. }
// if (u->is_SafePoint() }
// for (uint i = 0; i < oldbox->outcnt();) }
// if (!oldbox->is_eliminated()) }
// if (alock->is_Lock() && !lock->is_coarsened()) // There are 2 projections from the lock. The lock node will // be deleted when its last use is subsumed below. // The memory projection from a lock/unlock is RawMem // The input to a Lock is merged memory, so extract its RawMem input // (unless the MergeMem has been optimized away.) // Seach for MemBarAcquire node and delete it also. // Delete FastLock node also if this Lock node is unique user // (a loop peeling may clone a Lock node). // Seach for MemBarRelease node and delete it also. //------------------------------expand_lock_node---------------------- * See the full description in MacroAssembler::biased_locking_enter(). * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) { * // The object is biased. * proto_node = klass->prototype_header; * o_node = thread | proto_node; * x_node = o_node ^ mark_word; * if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ? * if( (x_node & biased_lock_mask) != 0 ) { * // The klass's prototype header is no longer biased. * cas(&mark_word, mark_word, proto_node) * // The klass's prototype header is still biased. * if( (x_node & epoch_mask) != 0 ) { // Expired epoch? * // Different thread or anonymous biased. * old = mark_word & (epoch_mask | age_mask | biased_lock_mask); * if( cas(&mark_word, old, new) == 0 ) { * goto slow_path; // Failed. * // The object is not biased. * if( FastLock(obj) == 0 ) { * OptoRuntime::complete_monitor_locking_Java(obj); // create a Phi for the memory state // First, check mark word for the biased lock pattern. // Get fast path - mark word has the biased lock pattern. // fast_lock_region->in(1) is set to slow path. // Now check that the lock is biased to the current thread and has // the same epoch and bias as Klass::_prototype_header. // Special-case a fresh allocation to avoid building nodes: // Get slow path - mark word does NOT match the value. // region->in(3) is set to fast path - the object is biased to the current thread. // Mark word does NOT match the value (thread | Klass::_prototype_header). // First, check biased pattern. // Get fast path - _prototype_header has the same biased lock pattern. // fast_lock_region->in(2) - the prototype header is no longer biased // and we have to revoke the bias on this object. // We are going to try to reset the mark of this object to the prototype // value and fall through to the CAS-based locking scheme. // Second, check epoch bits. // Get slow path - mark word does NOT match epoch bits. // The epoch of the current bias is not valid, attempt to rebias the object // toward the current thread. // rebiased_region->in(1) is set to fast path. // The epoch of the current bias is still valid but we know // nothing about the owner; it might be set or it might be clear. // Try to acquire the bias of the object using an atomic operation. // If this fails we will go in to the runtime to revoke the object's bias. // Get slow path - Failed to CAS. // region->in(4) is set to fast path - the object is rebiased to the current thread. // Call CAS-based locking scheme (FastLock node). // Get slow path - FastLock failed to lock the object. // region->in(2) is set to fast path - the object is locked to the current thread. // Reset lock's memory edge. // create a Phi for the memory state // Optimize test; set region slot 2 // Slow path can only throw asynchronous exceptions, which are always // de-opted. So the compiler thinks the slow-call can never throw an // exception. If it DOES throw an exception we would need the debug // info removed first (since if it throws there is no monitor). // disconnect fall-through projection from call and create a new one // hook up users of fall-through projection to region // region inputs are now complete //------------------------------expand_unlock_node---------------------- // No need for a null check on unlock // Check for biased locking unlock case, which is a no-op. // See the full description in MacroAssembler::biased_locking_exit(). // create a Phi for the memory state // create a Phi for the memory state // Optimize test; set region slot 2 // No exceptions for unlocking // disconnect fall-through projection from call and create a new one // hook up users of fall-through projection to region // region inputs are now complete //------------------------------expand_macro_nodes---------------------- // Returns true if a failure occurred. // First, attempt to eliminate locks // Next, attempt to eliminate allocations assert(
false,
"unknown node type in macro list");
// Make sure expansion will not cause node limit to be exceeded. // Worst case is a macro node gets expanded into about 50 nodes. // Allow 50% more for optimization. // nodes are removed from the macro list as they are processed // node is unreachable, so don't try to expand it assert(
false,
"unknown node type in macro list");