escape.cpp revision 1062
2273N/A * Copyright 2005-2009 Sun Microsystems, Inc. All Rights Reserved. 0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 0N/A * This code is free software; you can redistribute it and/or modify it 0N/A * under the terms of the GNU General Public License version 2 only, as 0N/A * published by the Free Software Foundation. 0N/A * This code is distributed in the hope that it will be useful, but WITHOUT 0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 0N/A * version 2 for more details (a copy is included in the LICENSE file that 0N/A * accompanied this code). 0N/A * You should have received a copy of the GNU General Public License version 0N/A * 2 along with this work; if not, write to the Free Software Foundation, 0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1472N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1472N/A * CA 95054 USA or visit www.sun.com if you need additional information or 1879N/A#
include "incls/_precompiled.incl" 0N/A "P",
// PointsToEdge 0N/A "D",
// DeferredEdge 0N/A // Add ConP(#NULL) and ConN(#NULL) nodes. 0N/A // don't add a self-referential edge, this can occur during removal of 0N/A // We are computing a raw address for a store captured by an Initialize 0N/A // compute an appropriate address type. AddP cases #3 and #5 (see below). 0N/A "offset must be a constant or it is initialization of array");
1909N/A // inline set_escape_state(idx, es); 0N/A // If we are still collecting or there were no non-escaping allocations 0N/A // we don't know the answer yet 0N/A // if the node was created after the escape computation, return 0N/A // if we have already computed a value, return it 0N/A // PointsTo() calls n->uncast() which can return a new ideal node. 0N/A // compute max escape state of anything this node could point to 1977N/A // cache the computed escape state 0N/A // If we have a JavaObject, return just that object 1879N/A // ensure that all inputs of a Phi have been processed assert(
false,
"neither PointsToEdge or DeferredEdge");
// no deferred or pointsto edges found. Assume the value was set // outside this method. Add the phantom object to the pointsto set. // This method is most expensive during ConnectionGraph construction. // Reuse vectorSet and an additional growable array for deferred edges. // Mark current edges as visited and move deferred edges to separate array. // Special case - field set outside (globally escaping). assert(
false,
"invalid connection graph");
// Add an edge to node given by "to_i" from any field of adr_i whose offset // matches "offset" A deferred edge is added if to_i is a LocalVar, and // a pointsto edge is added if it is a JavaObject // Add a deferred edge from node given by "from_i" to any field of adr_i // whose offset matches "offset". // we have not seen any stores to this field, assume it was set outside this method // AddP cases for Base and Address inputs: // case #1. Direct object's field reference: // Proj #5 ( oop result ) // CheckCastPP (cast to instance type) // AddP ( base == address ) // case #2. Indirect object's field reference: // CastPP (cast to instance type) // AddP ( base == address ) // case #3. Raw object's field reference for Initialize node: // Proj #5 ( oop result ) // case #4. Array's element reference: // {CheckCastPP | CastPP} // | AddP ( array's element offset ) // AddP ( array's offset ) // case #5. Raw object's field reference for arraycopy stub call: // The inline_native_clone() case when the arraycopy stub is called // after the allocation before Initialize and CheckCastPP nodes. // Proj #5 ( oop result ) // AddP ( base == address ) // case #6. Constant Pool, ThreadLocal, CastX2P or // Raw object's field reference: // {ConP, ThreadLocal, CastX2P, raw Load} // case #7. Klass's field reference. // AddP ( base == address ) // case #8. narrow Klass's field reference. // AddP ( base == address ) // Case #6 (unsafe access) may have several chained AddP nodes. // Find array's offset to push it on worklist first and // as result process an array's element offset first (pushed second) // to avoid CastPP for the array's offset. // Otherwise the inserted CastPP (LocalVar) will point to what // the AddP (Field) points to. Which would be wrong since // the algorithm expects the CastPP has the same point as // as AddP's base CheckCastPP (LocalVar). // memProj (from ArrayAllocation CheckCastPP) // | || Int (element index) // | || | ConI (log(element size)) // | AddP (array's element offset) // | | ConI (array's offset: #12(32-bits) or #24(64-bits)) // Load/Store (memory operation on array's element) // Adjust the type and inputs of an AddP which computes the // address of a field of an instance // We are computing a raw address for a store captured by an Initialize // compute an appropriate address type (cases #3 and #5). "old type must be non-instance or match new type");
// The type 't' could be subclass of 'base_t'. // As result t->offset() could be large then base_t's size and it will // cause the failure in add_offset() with narrow oops since TypeOopPtr() // constructor verifies correctness of the offset. // It could happened on subclass's branch (from the type profiling // inlining) which was not eliminated during parsing since the exactness // of the allocation type was not propagated to the subclass type check. // Or the type 't' could be not related to 'base_t' at all. // It could happened when CHA type is different from MDO type on a dead path // (for example, from instanceof check) which is not collapsed during parsing. // Do nothing for such AddP node and don't process its users since // this code branch will go away. return false;
// bail out // Do NOT remove the next line: ensure a new alias index is allocated // for the instance type. Note: C++ will not remove it since the call // record the allocation in the node map // Set addp's Base and Address to 'base'. // Skip AddP cases #3 and #5. // AddP case #4 (adr is array's element offset AddP node) // Put on IGVN worklist since at least addp's type was changed above. // Create a new version of orig_phi if necessary. Returns either the newly // created phi or an existing phi. Sets create_new to indicate wheter a new // phi was created. Cache the last newly created phi in the node map. // nothing to do if orig_phi is bottom memory or matches alias_idx // Have we recently created a Phi for this alias index? // Previous check may fail when the same wide memory Phi was split into Phis // for different memory slices. Search all Phis for this region. // Retry compilation without escape analysis. // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. // Return a new version of Memory Phi "orig_phi" with the inputs having the // specified alias index. // found an phi for which we created a new split, push current one on worklist and begin // verify that the new Phi has an input for each input of the original // Check if all new phi's inputs have specified alias index. // Otherwise use old phi. // we have finished processing a Phi, see if there are any more to do // The next methods are derived from methods in MemNode. // TypeInstPtr::NOTNULL+any is an OOP with unknown offset - generally // means an array I have not precisely typed yet. Do not do any // alias stuff with it any time soon. // Update input if it is progress over what we have now // Search memory chain of "mem" to find a MemNode whose address // is the specified alias index. break;
// hit one of our sentinels continue;
// don't search further for non-instance types // skip over a call which does not affect this memory slice break;
// hit one of our sentinels // Stop if this is the initialization for the object instance which // which contains this memory slice, otherwise skip over it. // Didn't find instance memory, search through general slice recursively. assert(
idx !=
alias_idx,
"Object is not scalar replaceable if a LoadStore node access its field");
// Create a new Phi with the specified alias index type. // Push all non-instance Phis on the orig_phis worklist to update inputs // during Phase 4 if needed. // the result is either MemNode, PhiNode, InitializeNode. // Convert the types of unescaped object to instance types where possible, // propagate the new type information through the graph, and update memory // edges and MergeMem inputs to reflect the new type. // We start with allocations (and calls which may be allocations) on alloc_worklist. // The processing is done in 4 phases: // Phase 1: Process possible allocations from alloc_worklist. Create instance // types for the CheckCastPP for allocations where possible. // Propagate the the new types through users as follows: // casts and Phi: push users on alloc_worklist // AddP: cast Base and Address inputs to the instance type // push any AddP users on alloc_worklist and push any memnode // users onto memnode_worklist. // Phase 2: Process MemNode's from memnode_worklist. compute new address type and // search the Memory chain for a store with the appropriate type // address type. If a Phi is found, create a new version with // the appropriate memory slices from each of the Phi inputs. // For stores, process the users as follows: // MemNode: push on memnode_worklist // MergeMem: push on mergemem_worklist // Phase 3: Process MergeMem nodes from mergemem_worklist. Walk each memory slice // moving the first node encountered of each instance type to the // the input corresponding to its alias index. // appropriate memory slice. // Phase 4: Update the inputs of non-instance memory Phis and the Memory input of memnodes. // In the following example, the CheckCastPP nodes are the cast of allocation // results and the allocation of node 29 is unescaped and eligible to be an // 20 AddP _ 19 19 10 Foo+12 alias_index=4 // 30 AddP _ 29 29 10 Foo+12 alias_index=4 // 40 StoreP 25 7 20 ... alias_index=4 // 50 StoreP 35 40 30 ... alias_index=4 // 60 StoreP 45 50 20 ... alias_index=4 // 70 LoadP _ 60 30 ... alias_index=4 // 80 Phi 75 50 60 Memory alias_index=4 // 90 LoadP _ 80 30 ... alias_index=4 // 100 LoadP _ 80 20 ... alias_index=4 // Phase 1 creates an instance type for node 29 assigning it an instance id of 24 // and creating a new alias index for node 30. This gives: // 20 AddP _ 19 19 10 Foo+12 alias_index=4 // 29 CheckCastPP "Foo" iid=24 // 30 AddP _ 29 29 10 Foo+12 alias_index=6 iid=24 // 40 StoreP 25 7 20 ... alias_index=4 // 50 StoreP 35 40 30 ... alias_index=6 // 60 StoreP 45 50 20 ... alias_index=4 // 70 LoadP _ 60 30 ... alias_index=6 // 80 Phi 75 50 60 Memory alias_index=4 // 90 LoadP _ 80 30 ... alias_index=6 // 100 LoadP _ 80 20 ... alias_index=4 // In phase 2, new memory inputs are computed for the loads and stores, // And a new version of the phi is created. In phase 4, the inputs to // node 80 are updated and then the memory nodes are updated with the // values computed in phase 2. This results in: // 20 AddP _ 19 19 10 Foo+12 alias_index=4 // 29 CheckCastPP "Foo" iid=24 // 30 AddP _ 29 29 10 Foo+12 alias_index=6 iid=24 // 40 StoreP 25 7 20 ... alias_index=4 // 50 StoreP 35 7 30 ... alias_index=6 // 60 StoreP 45 40 20 ... alias_index=4 // 70 LoadP _ 50 30 ... alias_index=6 // 80 Phi 75 40 60 Memory alias_index=4 // 120 Phi 75 50 50 Memory alias_index=6 // 90 LoadP _ 120 30 ... alias_index=6 // 100 LoadP _ 80 20 ... alias_index=4 // Phase 1: Process possible allocations from alloc_worklist. // Create instance types for the CheckCastPP for allocations where possible. // (Note: don't forget to change the order of the second AddP node on // the alloc_worklist if the order of the worklist processing is changed, // see the comment in find_second_addp().) // copy escape information to call node // We have an allocation or call which returns a Java object, // see if it is unescaped. // Find CheckCastPP for the allocate or for the return value of a call if (n ==
NULL) {
// No uses except Initialize node // Set the scalar_replaceable flag for allocation // so it could be eliminated if it has no uses. // The inline code for Object.clone() casts the allocation result to // java.lang.Object and then to the actual type of the allocated // object. Detect this case and use the second cast. // Also detect j.l.reflect.Array.newInstance(jobject, jint) case when // the allocation result is cast to java.lang.Object and then // to the actual Array type. // Non-scalar replaceable if the allocation type is unknown statically // (reflection allocation), the object can't be restored during // deoptimization without precise type. // Set the scalar_replaceable flag for allocation // so it could be eliminated. // in order for an object to be scalar-replaceable, it must be: // - a direct allocation (not a call returning an object) // - eligible to be a unique type // - not determined to be ineligible by escape analysis continue;
// not a TypeInstPtr // First, put on the worklist all Field edges from Connection Graph // which is more accurate then putting immediate users from Ideal Graph. "only AddP nodes are Field edges in CG");
if (
use->
outcnt() > 0) {
// Don't process dead nodes // An allocation may have an Initialize which has raw stores. Scan // the users of the raw allocation result and push AddP users continue;
// Assume the value was set outside this method. continue;
// already processed continue;
// Assume the value was set outside this method. // push users on appropriate worklist // Look for MergeMem nodes for calls which reference unique allocation // (through CheckCastPP nodes) even for debug info. // New alias types were created in split_AddP(). // Phase 2: Process MemNode's from memnode_worklist. compute new address type and // compute new values for Memory inputs (the Memory inputs are not // actually updated until phase 4.) // we don't need to do anything, but the users must be pushed if we haven't processed // we don't need to do anything, but the users of the memory projection must be pushed continue;
// don't push users // get the memory projection // push user on appropriate worklist // Phase 3: Process MergeMem nodes from mergemem_worklist. // Walk each memory moving the first node encountered of each // instance type to the the input corresponding to its alias index. // Note: we don't want to use MergeMemStream here because we only want to // scan inputs which exist at the start, not ones we add during processing. // Find any instance of the current type if we haven't encountered // a value of the instance along the chain. // Find the rest of instances values // Didn't find instance memory, search through general slice recursively. // Propagate new memory slices to following MergeMem nodes. // Phase 4: Update the inputs of non-instance memory Phis and // the Memory input of memnodes // First update the inputs of any non-instance Phi's from // which we split out an instance Phi. Note we don't have // to recursively process Phi's encounted on the input memory // chains as is done in split_memory_phi() since they will // also be processed here. // Update the memory inputs of MemNodes with the value we computed // EA brings benefits only when the code has allocations and/or locks which // are represented by ideal Macro nodes. for(
int i=0; i <
cnt; i++ ) {
// 1. Populate Connection Graph (CG) with Ideal nodes. // Push all useful nodes onto CG list and set their type. // Only allocations and java static calls results are checked // for an escape status. See process_call_result() below. return false;
// Nothing to do. // 2. First pass to create simple CG edges (doesn't require to walk CG). // 3. Pass to create fields edges (Allocate -F-> AddP). // 4. Build Connection Graph which need // to walk the connection graph. if (n !=
NULL) {
// Call, AddP, LoadP, StoreP // 5. Remove deferred edges from the graph and collect // information needed for type splitting. // Search for objects which are not scalar replaceable. // Mark their escape state as ArgEscape to propagate the state // to referenced objects. // Note: currently there are no difference in compiler optimizations // for ArgEscape objects and NoEscape objects which are not // Check if a field's initializing value is recorded and add // a corresponding NULL field's value if it is not recorded. // Connection Graph does not record a default initialization by NULL // captured by Initialize node. // Note: it will disable scalar replacement in some cases: // Point p[] = new Point[1]; // p[0] = new Point(); // Will be not scalar replaced // but it will save us from incorrect optimizations in next cases: // Point p[] = new Point[1]; // if ( x ) p[0] = new Point(); // Will be not scalar replaced // Without a control flow analysis we can't distinguish above cases. // It does not matter if it is not Allocation node since // only non-escaping allocations are scalar replaced. // A field's initializing value was not recorded. Add NULL. // An object is not scalar replaceable if the field which may point // to it has unknown offset (unknown element of an array of objects). // Currently an object is not scalar replaceable if a LoadStore node // access its field since the field value is unknown after it. // An object is not scalar replaceable if the address points // to unknown field (unknown element for arrays, offset is OffsetBot). // Or the address may point to more then one object. This may produce // the false positive result (set scalar_replaceable to false) // since the flow-insensitive escape analysis can't separate // the case when stores overwrite the field's value from the case // when stores happened on different control branches. // 6. Propagate escape states. // push all GlobalEscape nodes on the worklist // mark all nodes reachable from GlobalEscape nodes // push all ArgEscape nodes on the worklist // mark all nodes reachable from ArgEscape nodes // push all NoEscape nodes on the worklist // mark all nodes reachable from NoEscape nodes // Push scalar replaceable allocations on alloc_worklist // for processing in split_unique_types(). // Now use the escape information to create unique types for // scalar replaceable objects. // Clean up after split unique types. tty->
print(
"=== No allocations eliminated for ");
tty->
print(
" since EliminateAllocations is off ===");
tty->
print(
" since there are no scalar replaceable candidates ===");
tty->
print(
" since AliasLevel < 3 ===");
assert(
false,
"should be done already");
// Stub calls, objects do not escape but they are not scale replaceable. // Adjust escape state for outgoing arguments. // The inline_native_clone() case when the arraycopy stub is called // after the allocation before Initialize and CheckCastPP nodes. // Set AddP's base (Allocate) as not scalar replaceable since // pointer to the base (with offset) is passed as argument. // For a static call, we know exactly what method is being called. // Use bytecode estimator to record the call's escape affects // fall-through if not a Java method or no analyzer information // The argument global escapes, mark everything it could point to // The argument itself doesn't escape, but any fields might //The argument global escapes, mark everything it could point to // The argument itself doesn't escape, but any fields might // Fall-through here if not a Java method or no analyzer information // or some other type of call, assume the worst case: all arguments // adjust escape state for outgoing arguments // Also works for DecodeN(LoadNKlass). // Not scalar replaceable if the length is not constant or too big. // For a static call, we know exactly what method is being called. // Use bytecode estimator to record whether the call's return value escapes // Note: we use isa_ptr() instead of isa_oopptr() here because the // _multianewarray functions return a TypeRawPtr. break;
// doesn't return a pointer type // not a Java method, assume global escape // Returns a newly allocated unescaped object, simply // update dependency information. // Mark it as NoEscape so that objects referenced by // it's fields will be marked as NoEscape at least. // determine whether any arguments are returned // Returns unknown object. // Some other type of call, assume the worst case that the // returned value, if any, globally escapes. // Note: we use isa_ptr() instead of isa_oopptr() here because the // _multianewarray functions return a TypeRawPtr. // Populate Connection Graph with Ideal nodes and create simple // connection graph edges (do not need to check the node_type of inputs // or to call PointsTo() to walk the connection graph). return;
// No need to redefine node's state. // Arguments to allocation and locking don't escape. // Put Lock and Unlock nodes on IGVN worklist to process them during // the first IGVN optimization when escape information is still available. // Have to process call's arguments first. // Check if a call returns an object. // Note: use isa_ptr() instead of isa_oopptr() here because // the _multianewarray functions return a TypeRawPtr. // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because // ThreadLocal has RawPrt type. {
// "Unsafe" memory access. // assume all pointer constants globally escape except for null // assume all narrow oop constants globally escape except for null // assume that all exception objects globally escape // We have to assume all input parameters globally escape // (Note: passing 'false' since _processed is already set). // nothing to do if not an oop or narrow oop for (i =
1; i < n->
req() ; i++) {
continue;
// ignore top or inputs which go back this node // we are only interested in the result projection from a call // The call's result may need to be processed later if the call // returns it's argument and the argument is not processed yet. // Treat Return value as LocalVar with GlobalEscape escape state. // We are computing a raw address for a store captured // by an Initialize compute an appropriate address type. // Don't set processed bit for AddP, LoadP, StoreP since // they may need more then one pass to process. return;
// No need to redefine node's state. // Create a field edge to this node from everything base could point to. assert(
false,
"Op_LoadKlass");
// For everything "adr_base" could point to, create a deferred edge from // this node to each field with the same offset. for (
uint i =
1; i < n->
req() ; i++) {
continue;
// ignore top or inputs which go back this node // we are only interested in the result projection from a call // For everything "adr_base" could point to, create a deferred edge // to "val" from each field with the same offset. assert(
false,
"Op_ThreadLocal");
tty->
print(
"======== Connection graph for ");
// Print all locals which reference this allocation // Print all fields which reference this allocation