macro.cpp revision 4505
5541N/A * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. 5541N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5541N/A * This code is free software; you can redistribute it and/or modify it 5541N/A * under the terms of the GNU General Public License version 2 only, as 5541N/A * published by the Free Software Foundation. 5541N/A * This code is distributed in the hope that it will be useful, but WITHOUT 6982N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 6982N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 5541N/A * version 2 for more details (a copy is included in the LICENSE file that 6982N/A * You should have received a copy of the GNU General Public License version 5541N/A * 2 along with this work; if not, write to the Free Software Foundation, 6982N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 6982N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 5562N/A// Replace any references to "oldref" in inputs to "use" with "newref". 5562N/A// Returns the number of replacements made. 5562N/A // Copy debug information and adjust JVMState information 5546N/A // Clone old SafePointScalarObjectNodes, adjusting their field contents. 5571N/A // Fast path not-taken, i.e. slow path 5548N/A//--------------------copy_predefined_input_for_runtime_call-------------------- 5560N/A // Set fixed predefined input arguments 5548N/A//------------------------------make_slow_call--------------------------------- 5562N/A // Slow path call has no side-effects, uses few values 5548N/A // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj 5546N/A assert(
false,
"unexpected projection from allocation node.");
5562N/A// Eliminate a card mark sequence. p2x is a ConvP2XNode 5541N/A // The load is checking if the card has been written so 5562N/A // replace it with zero to fold the test. 5546N/A // It could be only one user, URShift node, in Object.clone() instrinsic 5548N/A // but the new allocation is passed to arraycopy stub and it could not 5546N/A // be scalar replaced. So we don't check the case. 5541N/A // An other case of only one user (Xor) is when the value check for NULL 5571N/A // in G1 post barrier is folded after CCP so the code which used URShift 5541N/A // Take Region node before eliminating post barrier since it also 5541N/A // eliminates CastP2X node when it has only one user. 5548N/A // Search for CastP2X->Xor->URShift->Cmp path which 5548N/A // checks if the store done to a different from the value's region. 5571N/A // And replace Cmp with #0 (false) to collapse G1 post barrier. 5560N/A "missing region check in G1 post barrier");
5560N/A // Search "if (marking != 0)" check and set it to "false". 5548N/A // There is no G1 pre barrier if previous stored value is NULL 5548N/A // (for example, after initialization). 5548N/A // Now CastP2X can be removed since it is used only on dead path 5548N/A // which currently still alive until igvn optimize it. 5548N/A// Search for a memory operation for the specified memory slice. 5548N/A // we can safely skip over safepoints, calls, locks and membars because we 5548N/A // already know that the object is safe to eliminate. 5571N/A // Array elements references have the same alias_idx 5571N/A // but different offset and different instance_id. 5551N/A // Can not bypass initialization of the instance 5562N/A // We are looking for stored value, return Initialize node 5562N/A // or memory edge from Allocate node. 5562N/A // Otherwise skip it (the call updated 'mem' value). 5562N/A assert(
false,
"Object is not scalar replaceable if a LoadStore node access its field");
5562N/A// Given a Memory Phi, compute a value Phi containing the values from stores 5562N/A// Note: this function is recursive, its depth is limied by the "level" argument 5562N/A// Returns the computed Phi, or NULL if it cannot compute it. 5562N/A // Check if an appropriate value phi already exists. 5571N/A // Check if an appropriate new value phi already exists. 5571N/A // create a new Phi for the value 5562N/A // hit a sentinel, return appropriate 0 value 5562N/A return NULL;
// can't find a value on this path 5562N/A assert(
false,
"Object is not scalar replaceable if a LoadStore node access its field");
5551N/A// Search the last value stored into the object's field. 5551N/A done =
true;
// hit a sentinel, return appropriate 0 value 5551N/A // try to find a phi's unique input 5551N/A // hit a sentinel, return appropriate 0 value 5551N/A // attempt to produce a Phi reflecting the values on the input paths of the Phi 5551N/A// Check the possibility of scalar replacement. 5551N/A // Scan the uses of the allocation to check for anything that would 5551N/A // prevent us from eliminating it. 5551N/A // All users were eliminated. 5560N/A // Object is passed as argument. 5560N/A // find the fields of the class which will be needed for safepoint debug information 5560N/A // find the array's elements which will be needed for safepoint debug information 5560N/A // Process the safepoint uses 5560N/A // Scan object's fields adding an input to the safepoint for each field. 5571N/A // The next code is taken from Parse::do_get_xxx(). 5571N/A // This can happen if the constant oop is non-perm. 5571N/A // Do not "join" in the previous type; it doesn't add value, 5571N/A // and may yield a vacuous result if the field is of interface type. 5571N/A // We weren't able to find a value for this field, 5571N/A // give up on eliminating this allocation. 5571N/A // Remove any extra entries we added to the safepoint. 5571N/A for (
int k = 0; k < j; k++) {
5571N/A // rollback processed safepoints 5571N/A // remove any extra entries we added to the safepoint 5571N/A // Now make a pass over the debug information replacing any references 5571N/A // to SafePointScalarObjectNode with the allocated object. 5571N/A }
else {
// Array's element 5571N/A tty->
print(
"=== At SafePoint node %d can't find value of array element [%d]",
5571N/A // Enable "DecodeN(EncodeP(Allocate)) --> Allocate" transformation 5571N/A // to be able scalar replace the allocation. 5571N/A // Now make a pass over the debug information replacing any references 5571N/A // to the allocated object with "sobj" 5908N/A// Process users of eliminated allocation. 5908N/A // Verify that there is no dependent MemBarVolatile nodes, 5908N/A // they should be removed during IGVN, see MemBarNode::Ideal(). 5908N/A "MemBarVolatile should be eliminated for non-escaping object");
5908N/A // Process other users of allocation's projections 5908N/A // Eliminate Initialize node. 5908N/A // raw memory addresses used only by the initialization 5908N/A//---------------------------set_eden_pointers------------------------- 5908N/A }
else {
// Shared allocation: load from globals 5908N/A//============================================================================= 5908N/A// Allocation attempts to be fast in the case of frequent small objects. 5908N/A// It breaks down like this: 5908N/A// 1) Size in doublewords is computed. This is a constant for objects and 5908N/A// variable for most arrays. Doubleword units are used to avoid size 5908N/A// overflow of huge doubleword arrays. We need doublewords in the end for 5908N/A// 2) Size is checked for being 'too large'. Too-large allocations will go 5908N/A// the slow path into the VM. The slow path can throw any required 5908N/A// exceptions, and does all the special checks for very large arrays. The 5908N/A// size test can constant-fold away for objects. For objects with 5908N/A// finalizers it constant-folds the otherway: you always go slow with 5908N/A// 3) If NOT using TLABs, this is the contended loop-back point. 5908N/A// Load-Locked the heap top. If using TLABs normal-load the heap top. 5908N/A// 4) Check that heap top + size*8 < max. If we fail go the slow ` route. 5908N/A// NOTE: "top+size*8" cannot wrap the 4Gig line! Here's why: for largish 5908N/A// "size*8" we always enter the VM, where "largish" is a constant picked small 5908N/A// enough that there's always space between the eden max and 4Gig (old space is 5908N/A// there so it's quite large) and large enough that the cost of entering the VM 5908N/A// is dwarfed by the cost to initialize the space. 5908N/A// 5) If NOT using TLABs, Store-Conditional the adjusted heap top back 5908N/A// down. If contended, repeat at step 3. If using TLABs normal-store 5908N/A// adjusted heap top back down; there is no contention. 5908N/A// 7) Merge with the slow-path; cast the raw memory pointer to the correct 5908N/A//============================================================================= 5908N/A// FastAllocateSizeLimit value is in DOUBLEWORDS. 5908N/A// Allocations bigger than this always go the slow route. 5908N/A// This value must be small enough that allocation attempts that need to 5908N/A// trigger exceptions go the slow route. Also, it must be small enough so 5908N/A// that heap_top + size_in_bytes does not wrap around the 4Gig limit. 5908N/A//=============================================================================j// 5908N/A// The allocator will coalesce int->oop copies away. See comment in 5908N/A// code shape produced here, so if you are changing this code shape 5908N/A// make sure the GC info for the heap-top is correct in and around the 5908N/A // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. 5908N/A // they will not be used if "always_slow" is set 5908N/A // The initial slow comparison is a size check, the comparison 5908N/A // we want to do is a BoolTest::gt 5908N/A // Force slow-path allocation 5908N/A // generate the initial test if necessary 5909N/A // Now make the initial failure test. Usually a too-big test but 5909N/A // might be a TRUE for finalizers or a fancy class check for 5909N/A // Plug the failing-too-big test into the slow-path region 6297N/A }
else {
// No initial test, just fall into next case 6297N/A // generate the fast allocation code unless we know that the initial test will always go slow 5909N/A // Fast path modifies only raw memory. 5909N/A // Load Eden::end. Loop invariant and hoisted. 5909N/A // Note: We set the control input on "eden_end" and "old_eden_top" when using 5909N/A // a TLAB to work around a bug where these values were being moved across 5909N/A // a safepoint. These are not oops, so they cannot be include in the oop 5909N/A // map, but they can be changed by a GC. The proper way to fix this would 5909N/A // be to set the raw memory state when generating a SafepointNode. However 5909N/A // this will require extensive changes to the loop optimization in order to 5909N/A // prevent a degradation of the optimization. 5909N/A // allocate the Region and Phi nodes for the result 5909N/A // We need a Region for the loop-back contended case. 5909N/A // Now handle the passing-too-big test. We fall into the contended 5909N/A // Load(-locked) the heap top. 5909N/A // See note above concerning the control input when using a TLAB 5909N/A // Add to heap top to get a new heap top 5909N/A // Check for needing a GC; compare against heap end 5909N/A // Plug the failing-heap-space-need-gc test into the slow-path region 5909N/A // This completes all paths into the slow merge point 5909N/A }
else {
// No initial slow path needed! 5909N/A // Just fall from the need-GC path straight into the VM call. 5909N/A // No need for a GC. Setup for the Store-Conditional 5909N/A // Grab regular I/O before optional prefetch may change it. 5909N/A // Slow-path does no I/O so just set it to the original I/O. 5909N/A // Name successful fast-path variables 5909N/A // Store (-conditional) the modified eden top back down. 5909N/A // StorePConditional produces flags for a test PLUS a modified raw 5909N/A // If not using TLABs, check to see if there was contention. 5909N/A // If contention, loopback and try again. 5909N/A // Fast-path succeeded with no contention! 6297N/A // Bump total allocated bytes for this thread 5909N/A // If initialization is performed by an array copy, any required 5909N/A // MemBarStoreStore was already added. If the object does not 5909N/A // escape no need for a MemBarStoreStore. Otherwise we need a 5909N/A // MemBarStoreStore so that stores that initialize this object 5909N/A // can't be reordered with a subsequent store that makes this 5909N/A // object accessible by other threads. 5909N/A // No InitializeNode or no stores captured by zeroing 5909N/A // elimination. Simply add the MemBarStoreStore after object 5909N/A // Add the MemBarStoreStore after the InitializeNode so that 5909N/A // all stores performing the initialization that were moved 5909N/A // before the InitializeNode happen before the storestore 5909N/A // The MemBarStoreStore depends on control and memory coming 5909N/A // All nodes that depended on the InitializeNode for control 5909N/A // and memory must now depend on the MemBarNode that itself 5909N/A // depends on the InitializeNode 5909N/A // Get base of thread-local storage area 5909N/A // Plug in the successful fast-path into the result merge point 5909N/A // Copy debug information and adjust JVMState information, then replace 5909N/A // allocate node with the call 5909N/A // Hook i_o projection to avoid its elimination during allocation 5909N/A // replacement (when only a slow call is generated). 5909N/A // Identify the output projections from the allocate node and 5909N/A // adjust any references to them. 5909N/A // The control and io projections look like: 5909N/A // v---Proj(ctrl) <-----+ v---CatchProj(ctrl) 5909N/A // ^---Proj(io) <-------+ ^---CatchProj(io) 5909N/A // We are interested in the CatchProj nodes. 5908N/A // An allocate node has separate memory projections for the uses on 5908N/A // the control and i_o paths. Replace the control memory projection with 5571N/A // result_phi_rawmem (unless we are only generating a slow call when 5571N/A // both memory projections are combined) 5571N/A // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete 5571N/A // _memproj_catchall so we end up with a call that has only 1 memory projection. 5571N/A // An allocate node has separate i_o projections for the uses on the control 5571N/A // and i_o paths. Always replace the control i_o projection with result i_o 5571N/A // otherwise incoming i_o become dead when only a slow call is generated 5571N/A // (it is different from memory projections where both projections are 5571N/A // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete 5571N/A // _ioproj_catchall so we end up with a call that has only 1 i_o projection. 5571N/A // if we generated only a slow call, we are done 5571N/A // Case of new array with negative size known during compilation. 5571N/A // AllocateArrayNode::Ideal() optimization disconnect unreachable 5571N/A // following code since call to runtime will throw exception. 5571N/A // As result there will be no users of i_o after the call. 5571N/A // Leave i_o attached to this call to avoid problems in preceding graph. 5571N/A // no uses of the allocation result 5571N/A // Plug slow-path into result merge point 5571N/A // This completes all paths into the result merge point 5571N/A// Helper for PhaseMacroExpand::expand_allocate_common. 5571N/A// Initializes the newly-allocated storage. 5571N/A // Store the klass & mark bits 5571N/A // For now only enable fast locking for non-array types 5571N/A // conservatively small header size: 5571N/A // Clear the object body, if necessary. 5571N/A // The init has somehow disappeared; be cautious and clear everything. 5571N/A // This can happen if a node is allocated but an uncommon trap occurs 5571N/A // immediately. In this case, the Initialize gets associated with the 5571N/A // trap, and may be placed in a different (outer) loop, if the Allocate 5571N/A // is in a loop. If (this is rare) the inner loop gets unrolled, then 5571N/A // there can be two Allocates to one Initialize. The answer in all these 5571N/A // edge cases is safety first. It is always safe to clear immediately 5571N/A // within an Allocate, and then (maybe or maybe not) clear some more later. 5571N/A // Try to win by zeroing only what the init does not store. 5571N/A // We can also try to do some peephole optimizations, 5571N/A // such as combining some adjacent subword stores. 5571N/A // We have no more use for this link, since the AllocateNode goes away: 5571N/A // (If we keep the link, it just confuses the register allocator, 5571N/A // who thinks he sees a real use of the address by the membar.) 5571N/A// Generate prefetch instructions for next allocations. 5908N/A // Generate prefetch allocation with watermark check. 5908N/A // As an allocation hits the watermark, we will prefetch starting 5908N/A // at a "distance" away from watermark. 5908N/A // I/O is used for Prefetch 5571N/A // check against new_eden_top 5571N/A // true node, add prefetchdistance 5561N/A // Insert a prefetch for each allocation. 5561N/A // This code is used for Sparc with BIS. 5561N/A // Generate several prefetch instructions. 5560N/A // Insert a prefetch for each allocation only on the fast-path 5560N/A // Generate several prefetch instructions. 5560N/A // Do not let it float too high, since if eden_top == eden_end, 5560N/A if( i == 0 ) {
// Set control for first prefetch, next follows it 5560N/A // Don't zero type array during slow allocation in VM since 5560N/A // it will be initialized later by arraycopy in compiled code. 5561N/A//-------------------mark_eliminated_box---------------------------------- 5561N/A// During EA obj may point to several objects but after few ideal graph 5561N/A// transformations (CCP) it may point to only one non escaping object 5561N/A// (but still using phi), corresponding locks and unlocks will be marked 5561N/A// for elimination. Later obj could be replaced with a new node (new phi) 5561N/A// and which does not have escape information. And later after some graph 5561N/A// reshape other locks and unlocks (which were not marked for elimination 5561N/A// before) are connected to this new obj (phi) but they still will not be 5561N/A// marked for elimination since new obj has no escape information. 5561N/A// Mark all associated (same box and obj) lock and unlock nodes for 5571N/A// elimination if some of them marked already. 5561N/A return;
// This BoxLock node was processed already. 5561N/A // New implementation (EliminateNestedLocks) has separate BoxLock 5560N/A // eliminated even if different objects are referenced in one locked region 5560N/A // (for example, OSR compilation of nested loop inside locked scope). 5560N/A // Box is used only in one lock region. Mark this box as eliminated. 5541N/A // Check lock's box since box could be referenced by Lock's debug info. // Mark eliminated all related locks and unlocks. // Create new "eliminated" BoxLock node and use it in monitor debug info // instead of oldbox for the same object. // Note: BoxLock node is marked eliminated only here and it is used // to indicate that all associated lock and unlock nodes are marked // Replace old box node with new box for all users of the same object. // Replace Box and mark eliminated all related locks and unlocks. // Replace old box in monitor debug info. //-----------------------mark_eliminated_locking_nodes----------------------- // Only Lock node has JVMState needed here. // Mark eliminated related nested locks and unlocks. // Note: BoxLock node is marked eliminated only here // and it is used to indicate that all associated lock // and unlock nodes are marked for elimination. // Verify that this Box is referenced only by related locks. // Mark all related locks and unlocks. // Process locks for non escaping object }
// EliminateNestedLocks // Look for all locks of this object and mark them and // corresponding BoxLock nodes as eliminated. // Replace old box node with new eliminated box for all users // of the same object and mark related locks as eliminated. // we have determined that this lock/unlock can be eliminated, we simply // eliminate the node without expanding it. // Note: The membar's associated with the lock/unlock are currently not // eliminated. This should be investigated as a future enhancement. // Check that new "eliminated" BoxLock node is created. log->
head(
"eliminate_lock lock='%d'",
// There are 2 projections from the lock. The lock node will // be deleted when its last use is subsumed below. // The memory projection from a lock/unlock is RawMem // The input to a Lock is merged memory, so extract its RawMem input // (unless the MergeMem has been optimized away.) // Seach for MemBarAcquireLock node and delete it also. // Delete FastLock node also if this Lock node is unique user // (a loop peeling may clone a Lock node). // Seach for MemBarReleaseLock node and delete it also. //------------------------------expand_lock_node---------------------- * See the full description in MacroAssembler::biased_locking_enter(). * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) { * // The object is biased. * proto_node = klass->prototype_header; * o_node = thread | proto_node; * x_node = o_node ^ mark_word; * if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ? * if( (x_node & biased_lock_mask) != 0 ) { * // The klass's prototype header is no longer biased. * cas(&mark_word, mark_word, proto_node) * // The klass's prototype header is still biased. * if( (x_node & epoch_mask) != 0 ) { // Expired epoch? * // Different thread or anonymous biased. * old = mark_word & (epoch_mask | age_mask | biased_lock_mask); * if( cas(&mark_word, old, new) == 0 ) { * goto slow_path; // Failed. * // The object is not biased. * if( FastLock(obj) == 0 ) { * OptoRuntime::complete_monitor_locking_Java(obj); // create a Phi for the memory state // First, check mark word for the biased lock pattern. // Get fast path - mark word has the biased lock pattern. // fast_lock_region->in(1) is set to slow path. // Now check that the lock is biased to the current thread and has // the same epoch and bias as Klass::_prototype_header. // Special-case a fresh allocation to avoid building nodes: // Get slow path - mark word does NOT match the value. // region->in(3) is set to fast path - the object is biased to the current thread. // Mark word does NOT match the value (thread | Klass::_prototype_header). // First, check biased pattern. // Get fast path - _prototype_header has the same biased lock pattern. // fast_lock_region->in(2) - the prototype header is no longer biased // and we have to revoke the bias on this object. // We are going to try to reset the mark of this object to the prototype // value and fall through to the CAS-based locking scheme. // Second, check epoch bits. // Get slow path - mark word does NOT match epoch bits. // The epoch of the current bias is not valid, attempt to rebias the object // toward the current thread. // rebiased_region->in(1) is set to fast path. // The epoch of the current bias is still valid but we know // nothing about the owner; it might be set or it might be clear. // Try to acquire the bias of the object using an atomic operation. // If this fails we will go in to the runtime to revoke the object's bias. // Get slow path - Failed to CAS. // region->in(4) is set to fast path - the object is rebiased to the current thread. // Call CAS-based locking scheme (FastLock node). // Get slow path - FastLock failed to lock the object. // region->in(2) is set to fast path - the object is locked to the current thread. // Reset lock's memory edge. // create a Phi for the memory state // Optimize test; set region slot 2 // Slow path can only throw asynchronous exceptions, which are always // de-opted. So the compiler thinks the slow-call can never throw an // exception. If it DOES throw an exception we would need the debug // info removed first (since if it throws there is no monitor). // disconnect fall-through projection from call and create a new one // hook up users of fall-through projection to region // region inputs are now complete //------------------------------expand_unlock_node---------------------- // No need for a null check on unlock // Check for biased locking unlock case, which is a no-op. // See the full description in MacroAssembler::biased_locking_exit(). // create a Phi for the memory state // create a Phi for the memory state // Optimize test; set region slot 2 // No exceptions for unlocking // disconnect fall-through projection from call and create a new one // hook up users of fall-through projection to region // region inputs are now complete //---------------------------eliminate_macro_nodes---------------------- // Eliminate scalar replaced allocations and associated locks. // First, attempt to eliminate locks for (
int i=0; i <
cnt; i++) {
// Before elimination mark all associated (same box and obj) // lock and unlock nodes. // Next, attempt to eliminate allocations //------------------------------expand_macro_nodes---------------------- // Returns true if a failure occurred. // Last attempt to eliminate macro nodes. // Make sure expansion will not cause node limit to be exceeded. // Worst case is a macro node gets expanded into about 50 nodes. // Allow 50% more for optimization. // Eliminate Opaque and LoopLimit nodes. Do it after all loop optimizations. // Remove it from macro list and put on IGVN worklist to optimize. // nodes are removed from the macro list as they are processed // node is unreachable, so don't try to expand it assert(
false,
"unknown node type in macro list");