macro.cpp revision 216
2371N/A * Copyright 2005-2007 Sun Microsystems, Inc. All Rights Reserved. 0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 0N/A * This code is free software; you can redistribute it and/or modify it 0N/A * under the terms of the GNU General Public License version 2 only, as 0N/A * published by the Free Software Foundation. 0N/A * This code is distributed in the hope that it will be useful, but WITHOUT 0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 0N/A * version 2 for more details (a copy is included in the LICENSE file that 0N/A * accompanied this code). 0N/A * You should have received a copy of the GNU General Public License version 0N/A * 2 along with this work; if not, write to the Free Software Foundation, 0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1472N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1472N/A * CA 95054 USA or visit www.sun.com if you need additional information or 1879N/A#
include "incls/_precompiled.incl" 1879N/A// Replace any references to "oldref" in inputs to "use" with "newref". 0N/A// Returns the number of replacements made. 0N/A // Copy debug information and adjust JVMState information 0N/A // Clone old SafePointScalarObjectNodes, adjusting their field contents. 0N/A // Fast path taken; set region slot 2 113N/A // Fast path not-taken, i.e. slow path 113N/A//--------------------copy_predefined_input_for_runtime_call-------------------- 113N/A // Set fixed predefined input arguments 0N/A//------------------------------make_slow_call--------------------------------- 0N/A // Slow path call has no side-effects, uses few values 113N/A // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj 2371N/A assert(
false,
"unexpected projection from allocation node.");
0N/A// Eliminate a card mark sequence. p2x is a ConvP2XNode // Search for a memory operation for the specified memory slice. return mem;
// hit one of our sentinals // we can safely skip over safepoints, calls, locks and membars because we // already know that the object is safe to eliminate. assert(
false,
"unexpected projection");
// Array elements references have the same alias_idx // but different offset and different instance_id. // Given a Memory Phi, compute a value Phi containing the values from stores // Note: this function is recursive, its depth is limied by the "level" argument // Returns the computed Phi, or NULL if it cannot compute it. // hit a sentinel, return appropriate 0 value return NULL;
// can't find a value on this path // Check if an appropriate node already exists. return NULL;
// unknown node on this path // create a new Phi for the value // Search the last value stored into the object's field. return NULL;
// found a loop, give up done =
true;
// hit a sentinel, return appropriate 0 value done =
true;
// Something go wrong. // try to find a phi's unique input assert(
false,
"unexpected node");
// hit a sentinel, return appropriate 0 value // attempt to produce a Phi reflecting the values on the input paths of the Phi // Check the possibility of scalar replacement. // Scan the uses of the allocation to check for anything that would // prevent us from eliminating it. // All users were eliminated. // Object is passed as argument. // Do scalar replacement. if (
res !=
NULL) {
// Could be NULL when there are no users // find the fields of the class which will be needed for safepoint debug information // find the array's elements which will be needed for safepoint debug information // Process the safepoint uses // Scan object's fields adding an input to the safepoint for each field. for (
int j = 0; j <
nfields; j++) {
// The next code is taken from Parse::do_get_xxx(). // This can happen if the constant oop is non-perm. // Do not "join" in the previous type; it doesn't add value, // and may yield a vacuous result if the field is of interface type. // we weren't able to find a value for this field, // give up on eliminating this allocation // remove any extra entries we added to the safepoint for (
int k = 0; k < j; k++) {
// rollback processed safepoints // remove any extra entries we added to the safepoint for (
int k = 0; k <
nfields; k++) {
// Now make a pass over the debug information replacing any references // to SafePointScalarObjectNode with the allocated object. tty->
print(
"=== At SafePoint node %d can't find value of Field: ",
}
else {
// Array's element tty->
print(
"=== At SafePoint node %d can't find value of array element [%d]",
tty->
print(
", which prevents elimination of: ");
// Enable "DecodeN(EncodeP(Allocate)) --> Allocate" transformation // to be able scalar replace the allocation. // Now make a pass over the debug information replacing any references // to the allocated object with "sobj" // Process users of eliminated allocation. assert(
res->
outcnt() == 0,
"all uses of allocated objects must be deleted");
// Process other users of allocation's projections // Eliminate Initialize node. // raw memory addresses used only by the initialization assert(
false,
"only Initialize or AddP expected");
//---------------------------set_eden_pointers------------------------- if (
UseTLAB) {
// Private allocation: load from TLS }
else {
// Shared allocation: load from globals//============================================================================= // Allocation attempts to be fast in the case of frequent small objects. // It breaks down like this: // 1) Size in doublewords is computed. This is a constant for objects and // variable for most arrays. Doubleword units are used to avoid size // overflow of huge doubleword arrays. We need doublewords in the end for // 2) Size is checked for being 'too large'. Too-large allocations will go // the slow path into the VM. The slow path can throw any required // exceptions, and does all the special checks for very large arrays. The // size test can constant-fold away for objects. For objects with // finalizers it constant-folds the otherway: you always go slow with // 3) If NOT using TLABs, this is the contended loop-back point. // Load-Locked the heap top. If using TLABs normal-load the heap top. // 4) Check that heap top + size*8 < max. If we fail go the slow ` route. // NOTE: "top+size*8" cannot wrap the 4Gig line! Here's why: for largish // "size*8" we always enter the VM, where "largish" is a constant picked small // enough that there's always space between the eden max and 4Gig (old space is // there so it's quite large) and large enough that the cost of entering the VM // is dwarfed by the cost to initialize the space. // 5) If NOT using TLABs, Store-Conditional the adjusted heap top back // down. If contended, repeat at step 3. If using TLABs normal-store // adjusted heap top back down; there is no contention. // 6) If !ZeroTLAB then Bulk-clear the object/array. Fill in klass & mark // 7) Merge with the slow-path; cast the raw memory pointer to the correct //============================================================================= // FastAllocateSizeLimit value is in DOUBLEWORDS. // Allocations bigger than this always go the slow route. // This value must be small enough that allocation attempts that need to // trigger exceptions go the slow route. Also, it must be small enough so // that heap_top + size_in_bytes does not wrap around the 4Gig limit. //=============================================================================j// // The allocator will coalesce int->oop copies away. See comment in // coalesce.cpp about how this works. It depends critically on the exact // code shape produced here, so if you are changing this code shape // make sure the GC info for the heap-top is correct in and around the Node*
length,
// array length for an array allocation // With escape analysis, the entire memory state was needed to be able to // eliminate the allocation. Since the allocations cannot be eliminated, // optimize it to the raw slice. // Load Eden::end. Loop invariant and hoisted. // Note: We set the control input on "eden_end" and "old_eden_top" when using // a TLAB to work around a bug where these values were being moved across // a safepoint. These are not oops, so they cannot be include in the oop // map, but the can be changed by a GC. The proper way to fix this would // be to set the raw memory state when generating a SafepointNode. However // this will require extensive changes to the loop optimization in order to // prevent a degradation of the optimization. // See comment in memnode.hpp, around line 227 in class LoadPNode. // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. // they will not be used if "always_slow" is set // The initial slow comparison is a size check, the comparison // we want to do is a BoolTest::gt // Force slow-path allocation // generate the initial test if necessary // Now make the initial failure test. Usually a too-big test but // might be a TRUE for finalizers or a fancy class check for // Plug the failing-too-big test into the slow-path region }
else {
// No initial test, just fall into next case // generate the fast allocation code unless we know that the initial test will always go slow // allocate the Region and Phi nodes for the result // We need a Region for the loop-back contended case. // Now handle the passing-too-big test. We fall into the contended // loop-back merge point. // Load(-locked) the heap top. // See note above concerning the control input when using a TLAB // Add to heap top to get a new heap top // Check for needing a GC; compare against heap end // Plug the failing-heap-space-need-gc test into the slow-path region // This completes all paths into the slow merge point }
else {
// No initial slow path needed! // Just fall from the need-GC path straight into the VM call. // No need for a GC. Setup for the Store-Conditional // Grab regular I/O before optional prefetch may change it. // Slow-path does no I/O so just set it to the original I/O. // Store (-conditional) the modified eden top back down. // StorePConditional produces flags for a test PLUS a modified raw // If not using TLABs, check to see if there was contention. // If contention, loopback and try again. // Fast-path succeeded with no contention! // Rename successful fast-path variables to make meaning more obvious // Get base of thread-local storage area // Plug in the successful fast-path into the result merge point // Generate slow-path call // Copy debug information and adjust JVMState information, then replace // allocate node with the call // Identify the output projections from the allocate node and // adjust any references to them. // The control and io projections look like: // v---Proj(ctrl) <-----+ v---CatchProj(ctrl) // ^---Proj(io) <-------+ ^---CatchProj(io) // We are interested in the CatchProj nodes. // An allocate node has separate memory projections for the uses on the control and i_o paths // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call) // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so // we end up with a call that has only 1 memory projection // An allocate node has separate i_o projections for the uses on the control and i_o paths // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call) // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so // we end up with a call that has only 1 control projection // if we generated only a slow call, we are done // no uses of the allocation result // Plug slow-path into result merge point // This completes all paths into the result merge point // Helper for PhaseMacroExpand::expand_allocate_common. // Initializes the newly-allocated storage. // Store the klass & mark bits // For now only enable fast locking for non-array types // conservatively small header size: if (k->
is_array_klass())
// we know the exact header size in most cases: // Clear the object body, if necessary. // The init has somehow disappeared; be cautious and clear everything. // This can happen if a node is allocated but an uncommon trap occurs // immediately. In this case, the Initialize gets associated with the // trap, and may be placed in a different (outer) loop, if the Allocate // is in a loop. If (this is rare) the inner loop gets unrolled, then // there can be two Allocates to one Initialize. The answer in all these // edge cases is safety first. It is always safe to clear immediately // within an Allocate, and then (maybe or maybe not) clear some more later. // Try to win by zeroing only what the init does not store. // We can also try to do some peephole optimizations, // such as combining some adjacent subword stores. // We have no more use for this link, since the AllocateNode goes away: // (If we keep the link, it just confuses the register allocator, // who thinks he sees a real use of the address by the membar.) // Generate prefetch instructions for next allocations. // Generate prefetch allocation with watermark check. // As an allocation hits the watermark, we will prefetch starting // at a "distance" away from watermark. // I/O is used for Prefetch // check against new_eden_top // true node, add prefetchdistance // Insert a prefetch for each allocation only on the fast-path // Generate several prefetch instructions only for arrays. // Do not let it float too high, since if eden_top == eden_end, if( i == 0 ) {
// Set control for first prefetch, next follows it // we have determined that this lock/unlock can be eliminated, we simply // eliminate the node without expanding it. // Note: The membar's associated with the lock/unlock are currently not // eliminated. This should be investigated as a future enhancement. // Mark the box lock as eliminated if all correspondent locks are eliminated // to construct correct debug info. // There are 2 projections from the lock. The lock node will // be deleted when its last use is subsumed below. // The memory projection from a lock/unlock is RawMem // The input to a Lock is merged memory, so extract its RawMem input // (unless the MergeMem has been optimized away.) // Seach for MemBarAcquire node and delete it also. // Seach for MemBarRelease node and delete it also. //------------------------------expand_lock_node---------------------- // Optimize test; set region slot 2 // Slow path can only throw asynchronous exceptions, which are always // de-opted. So the compiler thinks the slow-call can never throw an // exception. If it DOES throw an exception we would need the debug // info removed first (since if it throws there is no monitor). // disconnect fall-through projection from call and create a new one // hook up users of fall-through projection to region // region inputs are now complete // create a Phi for the memory state //------------------------------expand_unlock_node---------------------- // No need for a null check on unlock // Optimize test; set region slot 2 // No exceptions for unlocking // disconnect fall-through projection from call and create a new one // hook up users of fall-through projection to region // region inputs are now complete // create a Phi for the memory state //------------------------------expand_macro_nodes---------------------- // Returns true if a failure occurred. // attempt to eliminate allocations assert(
false,
"unknown node type in macro list");
// Make sure expansion will not cause node limit to be exceeded. // Worst case is a macro node gets expanded into about 50 nodes. // Allow 50% more for optimization. // nodes are removed from the macro list as they are processed // node is unreachable, so don't try to expand it assert(
false,
"unknown node type in macro list");