macro.cpp revision 3070
1988N/A * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. 0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 0N/A * This code is free software; you can redistribute it and/or modify it 0N/A * under the terms of the GNU General Public License version 2 only, as 0N/A * published by the Free Software Foundation. 0N/A * This code is distributed in the hope that it will be useful, but WITHOUT 0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 0N/A * version 2 for more details (a copy is included in the LICENSE file that 0N/A * accompanied this code). 0N/A * You should have received a copy of the GNU General Public License version 0N/A * 2 along with this work; if not, write to the Free Software Foundation, 0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1472N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 0N/A// Replace any references to "oldref" in inputs to "use" with "newref". 0N/A// Returns the number of replacements made. 0N/A // Copy debug information and adjust JVMState information 63N/A // Clone old SafePointScalarObjectNodes, adjusting their field contents. 0N/A // Fast path not-taken, i.e. slow path 0N/A//--------------------copy_predefined_input_for_runtime_call-------------------- 0N/A // Set fixed predefined input arguments 0N/A//------------------------------make_slow_call--------------------------------- 0N/A // Slow path call has no side-effects, uses few values 0N/A // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj 0N/A assert(
false,
"unexpected projection from allocation node.");
73N/A// Eliminate a card mark sequence. p2x is a ConvP2XNode 2379N/A // The load is checking if the card has been written so 2379N/A // replace it with zero to fold the test. 851N/A // It could be only one user, URShift node, in Object.clone() instrinsic 851N/A // but the new allocation is passed to arraycopy stub and it could not 851N/A // be scalar replaced. So we don't check the case. 851N/A // Remove G1 post barrier. 851N/A // Search for CastP2X->Xor->URShift->Cmp path which 851N/A // checks if the store done to a different from the value's region. 851N/A // And replace Cmp with #0 (false) to collapse G1 post barrier. 851N/A "missing region check in G1 post barrier");
851N/A // Remove G1 pre barrier. 851N/A // Search "if (marking != 0)" check and set it to "false". 851N/A // There is no G1 pre barrier if previous stored value is NULL 851N/A // (for example, after initialization). 851N/A // Now CastP2X can be removed since it is used only on dead path 851N/A // which currently still alive until igvn optimize it. 73N/A// Search for a memory operation for the specified memory slice. 605N/A return mem;
// hit one of our sentinels 73N/A // we can safely skip over safepoints, calls, locks and membars because we 73N/A // already know that the object is safe to eliminate. 73N/A // Array elements references have the same alias_idx 73N/A // but different offset and different instance_id. 1100N/A // Can not bypass initialization of the instance 1100N/A // We are looking for stored value, return Initialize node 1100N/A // or memory edge from Allocate node. 1100N/A // Otherwise skip it (the call updated 'mem' value). 584N/A assert(
false,
"Object is not scalar replaceable if a LoadStore node access its field");
73N/A// Given a Memory Phi, compute a value Phi containing the values from stores 73N/A// on the input paths. 73N/A// Note: this function is recursive, its depth is limied by the "level" argument 73N/A// Returns the computed Phi, or NULL if it cannot compute it. 247N/A // Check if an appropriate value phi already exists. 247N/A // Check if an appropriate new value phi already exists. 247N/A // create a new Phi for the value 73N/A // hit a sentinel, return appropriate 0 value 73N/A return NULL;
// can't find a value on this path 584N/A assert(
false,
"Object is not scalar replaceable if a LoadStore node access its field");
73N/A// Search the last value stored into the object's field. 73N/A done =
true;
// hit a sentinel, return appropriate 0 value 73N/A // try to find a phi's unique input 73N/A // hit a sentinel, return appropriate 0 value 73N/A // attempt to produce a Phi reflecting the values on the input paths of the Phi 73N/A // Something go wrong. 73N/A// Check the possibility of scalar replacement. 73N/A // Scan the uses of the allocation to check for anything that would 73N/A // prevent us from eliminating it. 73N/A // All users were eliminated. 73N/A // Object is passed as argument. 73N/A// Do scalar replacement. 73N/A if (
res !=
NULL) {
// Could be NULL when there are no users 73N/A // find the fields of the class which will be needed for safepoint debug information 73N/A // find the array's elements which will be needed for safepoint debug information 73N/A // Process the safepoint uses 73N/A // Scan object's fields adding an input to the safepoint for each field. 73N/A // The next code is taken from Parse::do_get_xxx(). 73N/A // This can happen if the constant oop is non-perm. 73N/A // Do not "join" in the previous type; it doesn't add value, 73N/A // and may yield a vacuous result if the field is of interface type. 2958N/A // We weren't able to find a value for this field, 2958N/A // give up on eliminating this allocation. 2958N/A // Remove any extra entries we added to the safepoint. 73N/A for (
int k = 0; k < j; k++) {
73N/A // rollback processed safepoints 73N/A // remove any extra entries we added to the safepoint 73N/A // Now make a pass over the debug information replacing any references 73N/A // to SafePointScalarObjectNode with the allocated object. 73N/A tty->
print(
"=== At SafePoint node %d can't find value of Field: ",
73N/A }
else {
// Array's element 73N/A tty->
print(
"=== At SafePoint node %d can't find value of array element [%d]",
124N/A // Enable "DecodeN(EncodeP(Allocate)) --> Allocate" transformation 124N/A // to be able scalar replace the allocation. 73N/A // Now make a pass over the debug information replacing any references 73N/A // to the allocated object with "sobj" 73N/A// Process users of eliminated allocation. 1100N/A // Verify that there is no dependent MemBarVolatile nodes, 1100N/A // they should be removed during IGVN, see MemBarNode::Ideal(). 1100N/A "MemBarVolatile should be eliminated for non-escaping object");
73N/A // Process other users of allocation's projections 73N/A // Eliminate Initialize node. 73N/A // raw memory addresses used only by the initialization 73N/A assert(
false,
"only Initialize or AddP expected");
0N/A//---------------------------set_eden_pointers------------------------- 0N/A if (
UseTLAB) {
// Private allocation: load from TLS 0N/A }
else {
// Shared allocation: load from globals 0N/A//============================================================================= 0N/A// A L L O C A T I O N 0N/A// Allocation attempts to be fast in the case of frequent small objects. 0N/A// It breaks down like this: 0N/A// 1) Size in doublewords is computed. This is a constant for objects and 0N/A// variable for most arrays. Doubleword units are used to avoid size 0N/A// overflow of huge doubleword arrays. We need doublewords in the end for 0N/A// 2) Size is checked for being 'too large'. Too-large allocations will go 0N/A// the slow path into the VM. The slow path can throw any required 0N/A// exceptions, and does all the special checks for very large arrays. The 0N/A// size test can constant-fold away for objects. For objects with 0N/A// finalizers it constant-folds the otherway: you always go slow with 0N/A// 3) If NOT using TLABs, this is the contended loop-back point. 0N/A// Load-Locked the heap top. If using TLABs normal-load the heap top. 0N/A// 4) Check that heap top + size*8 < max. If we fail go the slow ` route. 0N/A// NOTE: "top+size*8" cannot wrap the 4Gig line! Here's why: for largish 0N/A// "size*8" we always enter the VM, where "largish" is a constant picked small 0N/A// enough that there's always space between the eden max and 4Gig (old space is 0N/A// there so it's quite large) and large enough that the cost of entering the VM 0N/A// is dwarfed by the cost to initialize the space. 0N/A// 5) If NOT using TLABs, Store-Conditional the adjusted heap top back 0N/A// down. If contended, repeat at step 3. If using TLABs normal-store 0N/A// adjusted heap top back down; there is no contention. 0N/A// 6) If !ZeroTLAB then Bulk-clear the object/array. Fill in klass & mark 0N/A// 7) Merge with the slow-path; cast the raw memory pointer to the correct 0N/A//============================================================================= 0N/A// FastAllocateSizeLimit value is in DOUBLEWORDS. 0N/A// Allocations bigger than this always go the slow route. 0N/A// This value must be small enough that allocation attempts that need to 0N/A// trigger exceptions go the slow route. Also, it must be small enough so 0N/A// that heap_top + size_in_bytes does not wrap around the 4Gig limit. 0N/A//=============================================================================j// 0N/A// The allocator will coalesce int->oop copies away. See comment in 0N/A// code shape produced here, so if you are changing this code shape 0N/A// make sure the GC info for the heap-top is correct in and around the 3043N/A // Break this link that is no longer useful and confuses register allocation 0N/A // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. 0N/A // they will not be used if "always_slow" is set 0N/A // The initial slow comparison is a size check, the comparison 0N/A // we want to do is a BoolTest::gt 0N/A // Force slow-path allocation 0N/A // generate the initial test if necessary 0N/A // Now make the initial failure test. Usually a too-big test but 0N/A // might be a TRUE for finalizers or a fancy class check for 0N/A // Plug the failing-too-big test into the slow-path region 0N/A }
else {
// No initial test, just fall into next case 0N/A // generate the fast allocation code unless we know that the initial test will always go slow 565N/A // Fast path modifies only raw memory. 342N/A // Load Eden::end. Loop invariant and hoisted. 342N/A // Note: We set the control input on "eden_end" and "old_eden_top" when using 342N/A // a TLAB to work around a bug where these values were being moved across 342N/A // a safepoint. These are not oops, so they cannot be include in the oop 1988N/A // map, but they can be changed by a GC. The proper way to fix this would 342N/A // be to set the raw memory state when generating a SafepointNode. However 342N/A // this will require extensive changes to the loop optimization in order to 342N/A // prevent a degradation of the optimization. 0N/A // allocate the Region and Phi nodes for the result 0N/A // We need a Region for the loop-back contended case. 0N/A // Now handle the passing-too-big test. We fall into the contended 0N/A // loop-back merge point. 0N/A // Load(-locked) the heap top. 0N/A // See note above concerning the control input when using a TLAB 0N/A // Add to heap top to get a new heap top 0N/A // Check for needing a GC; compare against heap end 0N/A // Plug the failing-heap-space-need-gc test into the slow-path region 0N/A // This completes all paths into the slow merge point 0N/A }
else {
// No initial slow path needed! 0N/A // Just fall from the need-GC path straight into the VM call. 0N/A // No need for a GC. Setup for the Store-Conditional 0N/A // Grab regular I/O before optional prefetch may change it. 0N/A // Slow-path does no I/O so just set it to the original I/O. 1988N/A // Name successful fast-path variables 0N/A // Store (-conditional) the modified eden top back down. 0N/A // StorePConditional produces flags for a test PLUS a modified raw 0N/A // If not using TLABs, check to see if there was contention. 0N/A // If contention, loopback and try again. 0N/A // Fast-path succeeded with no contention! 1988N/A // Bump total allocated bytes for this thread 3043N/A // If initialization is performed by an array copy, any required 3043N/A // MemBarStoreStore was already added. If the object does not 3043N/A // escape no need for a MemBarStoreStore. Otherwise we need a 3043N/A // MemBarStoreStore so that stores that initialize this object 3043N/A // can't be reordered with a subsequent store that makes this 3043N/A // object accessible by other threads. 3043N/A // No InitializeNode or no stores captured by zeroing 3043N/A // elimination. Simply add the MemBarStoreStore after object 3043N/A // Add the MemBarStoreStore after the InitializeNode so that 3043N/A // all stores performing the initialization that were moved 3043N/A // before the InitializeNode happen before the storestore 3043N/A // The MemBarStoreStore depends on control and memory coming 3043N/A // All nodes that depended on the InitializeNode for control 3043N/A // and memory must now depend on the MemBarNode that itself 3043N/A // depends on the InitializeNode 0N/A "dtrace_object_alloc",
0N/A // Get base of thread-local storage area 0N/A // Plug in the successful fast-path into the result merge point 0N/A // Generate slow-path call 0N/A // Copy debug information and adjust JVMState information, then replace 0N/A // allocate node with the call 3047N/A // Hook i_o projection to avoid its elimination during allocation 3047N/A // replacement (when only a slow call is generated). 0N/A // Identify the output projections from the allocate node and 0N/A // adjust any references to them. 0N/A // The control and io projections look like: 0N/A // v---Proj(ctrl) <-----+ v---CatchProj(ctrl) 0N/A // ^---Proj(io) <-------+ ^---CatchProj(io) 0N/A // We are interested in the CatchProj nodes. 3047N/A // An allocate node has separate memory projections for the uses on 3047N/A // the control and i_o paths. Replace the control memory projection with 3047N/A // result_phi_rawmem (unless we are only generating a slow call when 3047N/A // both memory projections are combined) 3047N/A // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete 3047N/A // _memproj_catchall so we end up with a call that has only 1 memory projection. 3047N/A // An allocate node has separate i_o projections for the uses on the control 3047N/A // and i_o paths. Always replace the control i_o projection with result i_o 3047N/A // otherwise incoming i_o become dead when only a slow call is generated 3047N/A // (it is different from memory projections where both projections are 3047N/A // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete 3047N/A // _ioproj_catchall so we end up with a call that has only 1 i_o projection. 0N/A // if we generated only a slow call, we are done 3049N/A // Case of new array with negative size known during compilation. 3049N/A // AllocateArrayNode::Ideal() optimization disconnect unreachable 3049N/A // following code since call to runtime will throw exception. 3049N/A // As result there will be no users of i_o after the call. 3049N/A // Leave i_o attached to this call to avoid problems in preceding graph. 0N/A // no uses of the allocation result 0N/A // Plug slow-path into result merge point 0N/A // This completes all paths into the result merge point 0N/A// Helper for PhaseMacroExpand::expand_allocate_common. 0N/A// Initializes the newly-allocated storage. 0N/A // Store the klass & mark bits 0N/A // For now only enable fast locking for non-array types 0N/A // conservatively small header size: 0N/A // Clear the object body, if necessary. 0N/A // The init has somehow disappeared; be cautious and clear everything. 0N/A // This can happen if a node is allocated but an uncommon trap occurs 0N/A // immediately. In this case, the Initialize gets associated with the 0N/A // trap, and may be placed in a different (outer) loop, if the Allocate 0N/A // is in a loop. If (this is rare) the inner loop gets unrolled, then 0N/A // there can be two Allocates to one Initialize. The answer in all these 0N/A // edge cases is safety first. It is always safe to clear immediately 0N/A // within an Allocate, and then (maybe or maybe not) clear some more later. 0N/A // Try to win by zeroing only what the init does not store. 0N/A // We can also try to do some peephole optimizations, 0N/A // such as combining some adjacent subword stores. 0N/A // We have no more use for this link, since the AllocateNode goes away: 0N/A // (If we keep the link, it just confuses the register allocator, 0N/A // who thinks he sees a real use of the address by the membar.) 0N/A// Generate prefetch instructions for next allocations. 0N/A // Generate prefetch allocation with watermark check. 0N/A // As an allocation hits the watermark, we will prefetch starting 0N/A // at a "distance" away from watermark. 0N/A // I/O is used for Prefetch 0N/A // check against new_eden_top 0N/A // true node, add prefetchdistance 0N/A // adding prefetches 2679N/A // Insert a prefetch for each allocation. 2679N/A // This code is used for Sparc with BIS. 2679N/A // Generate several prefetch instructions. 0N/A // Insert a prefetch for each allocation only on the fast-path 2679N/A // Generate several prefetch instructions. 0N/A // Do not let it float too high, since if eden_top == eden_end, 0N/A // both might be null. 0N/A if( i == 0 ) {
// Set control for first prefetch, next follows it 2797N/A // Don't zero type array during slow allocation in VM since 2797N/A // it will be initialized later by arraycopy in compiled code. 3057N/A//-------------------mark_eliminated_box---------------------------------- 2579N/A// During EA obj may point to several objects but after few ideal graph 2579N/A// transformations (CCP) it may point to only one non escaping object 2579N/A// (but still using phi), corresponding locks and unlocks will be marked 2579N/A// for elimination. Later obj could be replaced with a new node (new phi) 2579N/A// and which does not have escape information. And later after some graph 2579N/A// reshape other locks and unlocks (which were not marked for elimination 2579N/A// before) are connected to this new obj (phi) but they still will not be 2579N/A// marked for elimination since new obj has no escape information. 2579N/A// Mark all associated (same box and obj) lock and unlock nodes for 2579N/A// elimination if some of them marked already. 3057N/A // Box is used only in one lock region. Mark this box as eliminated. 3057N/A // Check lock's box since box could be referenced by Lock's debug info. 3057N/A // Mark eliminated all related locks and unlocks. 3057N/A // Create new "eliminated" BoxLock node and use it in monitor debug info 3057N/A // instead of oldbox for the same object. 3057N/A // Note: BoxLock node is marked eliminated only here and it is used 3057N/A // to indicate that all associated lock and unlock nodes are marked 3057N/A // Replace old box node with new box for all users of the same object. 3057N/A // Replace Box and mark eliminated all related locks and unlocks. 3057N/A // Replace old box in monitor debug info. 3057N/A//-----------------------mark_eliminated_locking_nodes----------------------- 3057N/A // Only Lock node has JVMState needed here. 3057N/A // Mark eliminated related nested locks and unlocks. 2579N/A // Note: BoxLock node is marked eliminated only here 2579N/A // and it is used to indicate that all associated lock 2579N/A // and unlock nodes are marked for elimination. 3057N/A // Verify that this Box is referenced only by related locks. 3057N/A // Mark all related locks and unlocks. 3057N/A // Process locks for non escaping object 3057N/A // Look for all locks of this object and mark them and 3057N/A // corresponding BoxLock nodes as eliminated. 3057N/A // Replace old box node with new eliminated box for all users 3057N/A // of the same object and mark related locks as eliminated. 2579N/A// eliminate the node without expanding it. 2579N/A// eliminated. This should be investigated as a future enhancement. 2579N/A // Check that new "eliminated" BoxLock node is created. 66N/A // There are 2 projections from the lock. The lock node will 66N/A // be deleted when its last use is subsumed below. 0N/A // The input to a Lock is merged memory, so extract its RawMem input 0N/A // (unless the MergeMem has been optimized away.) 2674N/A // Seach for MemBarAcquireLock node and delete it also. 460N/A // Delete FastLock node also if this Lock node is unique user 460N/A // (a loop peeling may clone a Lock node). 2674N/A // Seach for MemBarReleaseLock node and delete it also. 0N/A//------------------------------expand_lock_node---------------------- 0N/A // Make the merge point 605N/A * See the full description in MacroAssembler::biased_locking_enter(). 420N/A * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) { 420N/A * // The object is biased. 420N/A * proto_node = klass->prototype_header; 420N/A * o_node = thread | proto_node; 420N/A * x_node = o_node ^ mark_word; 420N/A * if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ? 420N/A * if( (x_node & biased_lock_mask) != 0 ) { 420N/A * // The klass's prototype header is no longer biased. 420N/A * cas(&mark_word, mark_word, proto_node) 420N/A * // The klass's prototype header is still biased. 420N/A * if( (x_node & epoch_mask) != 0 ) { // Expired epoch? 420N/A * // Different thread or anonymous biased. 420N/A * old = mark_word & (epoch_mask | age_mask | biased_lock_mask); 420N/A * if( cas(&mark_word, old, new) == 0 ) { 420N/A * goto slow_path; // Failed. 420N/A * // The object is not biased. 420N/A * if( FastLock(obj) == 0 ) { 420N/A * OptoRuntime::complete_monitor_locking_Java(obj); 420N/A // create a Phi for the memory state 420N/A // First, check mark word for the biased lock pattern. 420N/A // Get fast path - mark word has the biased lock pattern. 420N/A // fast_lock_region->in(1) is set to slow path. 420N/A // Now check that the lock is biased to the current thread and has 420N/A // the same epoch and bias as Klass::_prototype_header. 420N/A // Special-case a fresh allocation to avoid building nodes: 420N/A // Get slow path - mark word does NOT match the value. 420N/A // region->in(3) is set to fast path - the object is biased to the current thread. 420N/A // Mark word does NOT match the value (thread | Klass::_prototype_header). 420N/A // First, check biased pattern. 420N/A // Get fast path - _prototype_header has the same biased lock pattern. 420N/A // fast_lock_region->in(2) - the prototype header is no longer biased 420N/A // and we have to revoke the bias on this object. 420N/A // We are going to try to reset the mark of this object to the prototype 420N/A // value and fall through to the CAS-based locking scheme. 420N/A // Second, check epoch bits. 420N/A // Get slow path - mark word does NOT match epoch bits. 420N/A // The epoch of the current bias is not valid, attempt to rebias the object 420N/A // toward the current thread. 420N/A // rebiased_region->in(1) is set to fast path. 420N/A // The epoch of the current bias is still valid but we know 420N/A // nothing about the owner; it might be set or it might be clear. 420N/A // Try to acquire the bias of the object using an atomic operation. 420N/A // If this fails we will go in to the runtime to revoke the object's bias. 420N/A // Get slow path - Failed to CAS. 420N/A // region->in(4) is set to fast path - the object is rebiased to the current thread. 420N/A // Call CAS-based locking scheme (FastLock node). 420N/A // Get slow path - FastLock failed to lock the object. 420N/A // region->in(2) is set to fast path - the object is locked to the current thread. 420N/A // Reset lock's memory edge. 420N/A // create a Phi for the memory state 420N/A // Optimize test; set region slot 2 0N/A // Make slow path call 0N/A // Slow path can only throw asynchronous exceptions, which are always 0N/A // de-opted. So the compiler thinks the slow-call can never throw an 0N/A // exception. If it DOES throw an exception we would need the debug 0N/A // info removed first (since if it throws there is no monitor). 0N/A // Capture slow path 0N/A // disconnect fall-through projection from call and create a new one 0N/A // hook up users of fall-through projection to region 0N/A // region inputs are now complete 0N/A//------------------------------expand_unlock_node---------------------- 0N/A // No need for a null check on unlock 0N/A // Make the merge point 420N/A // Check for biased locking unlock case, which is a no-op. 605N/A // See the full description in MacroAssembler::biased_locking_exit(). 420N/A // create a Phi for the memory state 420N/A // create a Phi for the memory state 0N/A // Optimize test; set region slot 2 0N/A // No exceptions for unlocking 0N/A // Capture slow path 0N/A // disconnect fall-through projection from call and create a new one 0N/A // hook up users of fall-through projection to region 0N/A // region inputs are now complete 2958N/A//---------------------------eliminate_macro_nodes---------------------- 2958N/A// Eliminate scalar replaced allocations and associated locks. 460N/A // First, attempt to eliminate locks 2579N/A // Before elimination mark all associated (same box and obj) 460N/A // Next, attempt to eliminate allocations 2958N/A//------------------------------expand_macro_nodes---------------------- 2958N/A// Returns true if a failure occurred. 2958N/A // Last attempt to eliminate macro nodes. 73N/A // Make sure expansion will not cause node limit to be exceeded. 73N/A // Worst case is a macro node gets expanded into about 50 nodes. 73N/A // Allow 50% more for optimization. 2958N/A // Eliminate Opaque and LoopLimit nodes. Do it after all loop optimizations. 2958N/A // Remove it from macro list and put on IGVN worklist to optimize. 0N/A // expand "macro" nodes 0N/A // nodes are removed from the macro list as they are processed 0N/A // node is unreachable, so don't try to expand it 0N/A assert(
false,
"unknown node type in macro list");