gcm.cpp revision 833
579N/A * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 0N/A * This code is free software; you can redistribute it and/or modify it 0N/A * under the terms of the GNU General Public License version 2 only, as 0N/A * published by the Free Software Foundation. 0N/A * This code is distributed in the hope that it will be useful, but WITHOUT 0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 0N/A * version 2 for more details (a copy is included in the LICENSE file that 0N/A * accompanied this code). 0N/A * You should have received a copy of the GNU General Public License version 0N/A * 2 along with this work; if not, write to the Free Software Foundation, 0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 0N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 0N/A * CA 95054 USA or visit www.sun.com if you need additional information or 0N/A * have any questions. 0N/A// Portions of code courtesy of Clifford Click 0N/A// Optimization - Graph Style 0N/A#
include "incls/_precompiled.incl" 552N/A// To avoid float value underflow 0N/A//----------------------------schedule_node_into_block------------------------- 0N/A// Insert node n into block b. Look for projections of n and make sure they 0N/A // Set basic block of n, Add n to b, 0N/A // After Matching, nearly any old Node may have projections trailing it. 0N/A // These are usually machine-dependent flags. In any case, they might 0N/A // float to another block below this one. Move them up. 0N/A if (
buse != b) {
// In wrong block? 601N/A//----------------------------replace_block_proj_ctrl------------------------- 601N/A// Nodes that have is_block_proj() nodes as their control need to use 601N/A// the appropriate Region for their actual block as their control since 601N/A// the projection will be in a predecessor block. 601N/A if (p !=
NULL && p != n) {
// Control from a block projection? 601N/A // Find trailing Region 601N/A // Search for successor 601N/A // Find which output path belongs to projection 601N/A // Change control to match head of successor basic block 0N/A//------------------------------schedule_pinned_nodes-------------------------- 0N/A// Set the basic block for Nodes pinned into blocks 0N/A // Allocate node stack of size C->unique()+8 to avoid frequent realloc 601N/A // Before setting block replace block_proj control edge 0N/A for(
int i = n->
req() -
1; i >= 0; --i ) {
// For all inputs 0N/A// Assert that new input b2 is dominated by all previous inputs. 0N/A// Check this by by seeing that it is dominated by b1, the deepest 0N/A// input observed until b2. 0N/A // Detected an unschedulable graph. Print some nice stuff and die. 0N/A for (
uint j=0; j<n->
len(); j++) {
// For all inputs 0N/A if (
inn ==
NULL)
continue;
// Ignore NULL, missing inputs 0N/A // Find the last input dominated by all other inputs. 0N/A for (
uint k = 0; k < n->
len(); k++) {
// For all inputs 0N/A if (
inn ==
NULL)
continue;
// Ignore NULL, missing inputs 0N/A // The new inb must be dominated by the previous deepb. 0N/A // The various inputs must be linearly ordered in the dom 0N/A // tree, or else there will not be a unique deepest block. 0N/A//------------------------------schedule_early--------------------------------- 0N/A// Find the earliest Block any instruction can be placed in. Some instructions 0N/A// are pinned into Blocks. Unpinned instructions can appear in last block in 0N/A// which all their inputs occur. 0N/A // Allocate stack with enough space to avoid frequent realloc 0N/A // roots.push(_root); _root will be processed among C->top() inputs 0N/A // Use local variables nstack_top_n & nstack_top_i to cache values 0N/A//while_nstack_nonempty: 0N/A // Get parent node and next input's index from stack's top. 601N/A // Fixup some control. Constants without control get attached 601N/A // to root and nodes that use is_block_proj() nodes should be attached 601N/A // to the region that starts their block. 0N/A }
else {
// n->in(0) == NULL 0N/A if (n->
req() ==
1) {
// This guy is a constant with NO inputs? 0N/A // First, visit all inputs and force them to get a block. If an 0N/A // input is already in a block we quit following inputs (to avoid 0N/A // cycles). Instead we put that Node on a worklist to be handled 0N/A // later (since IT'S inputs may not have a block yet). 0N/A bool done =
true;
// Assume all n's inputs will be processed 0N/A while (i < n->
len()) {
// For all inputs 0N/A if (
in ==
NULL)
continue;
// Ignore NULL, missing inputs 0N/A // assert( !visited.test(in->_idx), "did not schedule early" ); 0N/A done =
false;
// Not all n's inputs processed. 0N/A break;
// continue while_nstack_nonempty; 0N/A // All of n's inputs have been processed, complete post-processing. 0N/A // Some instructions are pinned into a block. These include Region, 0N/A // Phi, Start, Return, and other control-dependent instructions and 0N/A // any projections which depend on them. 0N/A // Set earliest legal block. 0N/A // Finished all nodes on stack. 0N/A // Process next node on the worklist 'roots'. 0N/A // Get saved parent node and next input's index. 0N/A//------------------------------dom_lca---------------------------------------- 0N/A// Find least common ancestor in dominator tree 0N/A// LCA is a current notion of LCA, to be raised above 'this'. 0N/A// As a convenient boundary condition, return 'this' if LCA is NULL. 0N/A// Find the LCA of those two nodes. 0N/A while (
LCA !=
anc) {
// Walk both up till they are the same 0N/A//--------------------------raise_LCA_above_use-------------------------------- 0N/A// We are placing a definition, and have been given a def->use edge. 0N/A// The definition must dominate the use, so move the LCA upward in the 0N/A// dominator tree to dominate the use. If the use is a phi, adjust 0N/A// the LCA only with the phi input paths which actually use this def. 0N/A if (
buse ==
NULL)
return LCA;
// Unused killing Projs have no use block 0N/A // Why does not this loop just break after finding the matching input to 0N/A // chains. Means I cannot distinguish, from the def-use direction, which 0N/A // of many use-defs lead from the same use to the same def. That is, this 0N/A // Phi might have several uses of the same def. Each use appears in a 0N/A // different predecessor block. But when I enter here, I cannot distinguish 0N/A // which use-def edge I should find the predecessor block for. So I find 0N/A // them all. Means I do a little extra work if a Phi uses the same value 0N/A//----------------------------raise_LCA_above_marks---------------------------- 0N/A// Return a new LCA that dominates LCA and any of its marked predecessors. 0N/A// Search all my parents up to 'early' (exclusive), looking for predecessors 0N/A// which are marked with the given index. Return the LCA (in the dom tree) 0N/A// of all marked blocks. If there are none marked, return the original 0N/A // Test and set the visited bit. 0N/A // Don't process the current LCA, otherwise the search may terminate early 0N/A // Resume searching at that point, skipping intermediate levels. 215N/A continue;
// Don't mark as visited to avoid early termination. 0N/A // Keep searching through this block's predecessors. 0N/A//--------------------------memory_early_block-------------------------------- 0N/A// This is a variation of find_deepest_input, the heart of schedule_early. 0N/A// Find the "early" block for a load, if we considered only memory and 0N/A// address inputs, that is, if other data inputs were ignored. 0N/A// Because a subset of edges are considered, the resulting block will 0N/A// be earlier (at a shallower dom_depth) than the true schedule_early 0N/A// point of the node. We compute this earlier block as a more permissive 0N/A// site for anti-dependency insertion, but only if subsume_loads is enabled. 0N/A // In the comparision below, add one to account for the control input, 0N/A // which may be null, but always takes up a spot in the in array. 0N/A // This "load" has more inputs than just the memory, base and index inputs. 0N/A // For purposes of checking anti-dependences, we need to start 0N/A // from the early block of only the address portion of the instruction, 0N/A // and ignore other blocks that may have factored into the wider 0N/A // schedule_early calculation. 0N/A // The new inb must be dominated by the previous deepb. 0N/A // The various inputs must be linearly ordered in the dom 0N/A // tree, or else there will not be a unique deepest block. 0N/A//--------------------------insert_anti_dependences--------------------------- 0N/A// A load may need to witness memory that nearby stores can overwrite. 0N/A// For each nearby store, either insert an "anti-dependence" edge 0N/A// from the load to the store, or else move LCA upward to force the 0N/A// load to (eventually) be scheduled in a block above the store. 0N/A// Do not add edges to stores on distinct control-flow paths; 0N/A// only add edges to stores which might interfere. 0N/A// Return the (updated) LCA. There will not be any possibly interfering 0N/A// store between the load's "early block" and the updated LCA. 0N/A// Any stores in the updated LCA will have new precedence edges 0N/A// back to the load. The caller is expected to schedule the load 0N/A// in the LCA, in which case the precedence edges will make LCM 0N/A// preserve anti-dependences. The caller may also hoist the load 0N/A// above the LCA, if it is not the early block. 0N/A // Compute the alias index. Loads and stores with different alias indices 0N/A // do not need anti-dependence edges. 0N/A // Load nodes should not consume all of memory. 0N/A // Reporting a bottom type indicates a bug in adlc. 0N/A // If some particular type of node validly consumes all of memory, 0N/A // sharpen the preceding "if" to exclude it, so we can catch bugs here. 0N/A tty->
print_cr(
"*** Possible Anti-Dependence Bug: Load consumes all of memory.");
0N/A "String compare is only known 'load' that does not conflict with any stores");
681N/A "String equals is a 'load' that does not conflict with any stores");
681N/A "String indexOf is a 'load' that does not conflict with any stores");
681N/A "Arrays equals is a 'load' that do not conflict with any stores");
0N/A // It is impossible to spoil this load by putting stores before it, 0N/A // because we know that the stores will never update the value 0N/A // which 'load' must witness. 0N/A // Note the earliest legal placement of 'load', as determined by 0N/A // by the unique point in the dom tree where all memory effects 0N/A // and other inputs are first available. (Computed by schedule_early.) 0N/A // For normal loads, 'early' is the shallowest place (dom graph wise) 0N/A // to look for anti-deps between this load and any store. 0N/A // If we are subsuming loads, compute an "early" block that only considers 0N/A // memory or address inputs. This block may be different than the 0N/A // schedule_early block in that it could be at an even shallower depth in the 0N/A // dominator tree, and allow for a broader discovery of anti-dependences. 0N/A // %%% This extra checking fails because MergeMem nodes are not GVNed. 0N/A // Provide "phi_inputs" to check if every input to a PhiNode is from the 0N/A // original memory state. This indicates a PhiNode for which should not 0N/A // prevent the load from sinking. For such a block, set_raise_LCA_mark 0N/A // may be overly conservative. 0N/A // Mechanism: count inputs seen for each Phi encountered in worklist_store. 0N/A // 'load' uses some memory state; look for users of the same state. 0N/A // Recurse through MergeMem nodes to the stores that use them. 0N/A // Each of these stores is a possible definition of memory 0N/A // that 'load' needs to use. We need to force 'load' 0N/A // to occur before each such store. When the store is in 0N/A // the same block as 'load', we insert an anti-dependence 0N/A // edge load->store. 0N/A // The relevant stores "nearby" the load consist of a tree rooted 0N/A // at initial_mem, with internal nodes of type MergeMem. 0N/A // Therefore, the branches visited by the worklist are of this form: 0N/A // initial_mem -> (MergeMem ->)* store 0N/A // The anti-dependence constraints apply only to the fringe of this tree. 0N/A // Examine a nearby store to see if it might interfere with our load. 0N/A // MergeMems do not directly have anti-deps. 0N/A // Treat them as internal nodes in a forward tree of memory states, 0N/A // the leaves of which are each a 'possible-def'. 0N/A // Be sure we don't get into combinatorial problems. 0N/A // (Allow phis to be repeated; they can merge two relevant states.) 31N/A for (; j > 0; j--) {
31N/A if (j > 0)
continue;
// already on work list; do not repeat 0N/A // Compute the alias index. Loads and stores with different alias 0N/A // indices do not need anti-dependence edges. Wide MemBar's are 0N/A // anti-dependent on everything (except immutable memories). 0N/A // Most slow-path runtime calls do NOT modify Java memory, but 0N/A // they can block and so write Raw memory. 0N/A // Check for call into the runtime using the Java calling 0N/A // convention (and from there into a wrapper); it has no 0N/A // _method. Can't do this optimization for Native calls because 0N/A // they CAN write to Java memory. 0N/A // These runtime calls do not write to Java visible memory 0N/A // (other than Raw) and so do not require anti-dependence edges. 0N/A // Same for SafePoints: they read/write Raw but only read otherwise. 0N/A // This is basically a workaround for SafePoints only defining control 0N/A // instead of control + memory. 0N/A // Some raw memory, such as the load of "top" at an allocation, 0N/A // can be control dependent on the previous safepoint. See 0N/A // comments in GraphKit::allocate_heap() about control input. 0N/A // Inserting an anti-dep between such a safepoint and a use 0N/A // creates a cycle, and will cause a subsequent failure in 0N/A // local scheduling. (BugId 4919904) 0N/A // (%%% How can a control input be a safepoint and not a projection??) 0N/A // Identify a block that the current load must be above, 0N/A // or else observe that 'store' is all the way up in the 0N/A // earliest legal block for 'load'. In the latter case, 0N/A // immediately insert an anti-dependence edge. 0N/A // 'load' uses memory which is one (or more) of the Phi's inputs. 0N/A // It must be scheduled not before the Phi, but rather before 0N/A // each of the relevant Phi inputs. 0N/A // Instead of finding the LCA of all inputs to a Phi that match 'mem', 0N/A // we mark each corresponding predecessor block and do a combined 0N/A // hoisting operation later (raise_LCA_above_marks). 0N/A // Do not assert(store_block != early, "Phi merging memory after access") 0N/A // PhiNode may be at start of block 'early' with backedge to 'early' 0N/A // If any predecessor of the Phi matches the load's "early block", 0N/A // we do not need a precedence edge between the Phi and 'load' 605N/A // since the load will be forced into a block preceding the Phi. 788N/A // anti-dependent upon PHI pinned below 'early', no edge needed 0N/A // This assert asks about correct handling of PhiNodes, which may not 0N/A // have all input edges directly from 'mem'. See BugId 4621264 0N/A // Increment by exactly one even if there are multiple copies of 'mem' 0N/A // coming into the phi, because we will run this block several times 0N/A // if there are several copies of 'mem'. (That's how DU iterators work.) 0N/A "Expect at least one phi input will not be from original memory state");
0N/A#
endif //TRACK_PHI_INPUTS 0N/A // 'store' is between the current LCA and earliest possible block. 0N/A // Label its block, and decide later on how to raise the LCA 0N/A // to include the effect on LCA of this store. 0N/A // If this store's block gets chosen as the raised LCA, we 0N/A // will find him on the non_early_stores list and stick him 0N/A // with a precedence edge. 0N/A // (But, don't bother if LCA is already raised all the way.) 0N/A // Found a possibly-interfering store in the load's 'early' block. 0N/A // This means 'load' cannot sink at all in the dominator tree. 0N/A // Add an anti-dep edge, and squeeze 'load' into the highest block. 0N/A // This turns off the process of gathering non_early_stores. 0N/A // (Worklist is now empty; all nearby stores have been visited.) 0N/A // Finished if 'load' must be scheduled in its 'early' block. 0N/A // If we found any stores there, they have already been given 0N/A // precedence edges. 0N/A // We get here only if there are no possibly-interfering stores 0N/A // in the load's 'early' block. Move LCA up above all predecessors 0N/A // which contain stores we have noted. 0N/A // The raised LCA block can be a home to such interfering stores, 0N/A // but its predecessors must not contain any such stores. 0N/A // The raised LCA will be a lower bound for placing the load, 0N/A // preventing the load from sinking past any block containing 0N/A // a store that may invalidate the memory state required by 'load'. 0N/A // Insert anti-dependence edges from 'load' to each store 0N/A // in the non-early LCA block. 0N/A // Mine the non_early_stores list for such stores. 0N/A // add anti_dependence from store to load in its own block 0N/A // Any other stores we found must be either inside the new LCA 0N/A // or else outside the original LCA. In the latter case, they 0N/A // did not interfere with any use of 'load'. 0N/A // Return the highest block containing stores; any stores 0N/A // within that block have been given anti-dependence edges. 0N/A// This class is used to iterate backwards over the nodes in the graph. 0N/A // Constructor for the iterator 0N/A // Postincrement operator to iterate over the nodes 0N/A// Constructor for the Node_Backward_Iterator 0N/A // The stack should contain exactly the root 0N/A // Clear the visited bits 0N/A// Iterator for the Node_Backward_Iterator 0N/A // If the _stack is empty, then just return NULL: finished. 0N/A // '_stack' is emulating a real _stack. The 'visit-all-users' loop has been 0N/A // made stateless, so I do not need to record the index 'i' on my _stack. 0N/A // Instead I visit all users each time, scanning for unvisited users. 0N/A // I visit unvisited not-anti-dependence users first, then anti-dependent 0N/A // I cycle here when I am entering a deeper level of recursion. 0N/A // The key variable 'self' was set prior to jumping here. 0N/A // Now schedule all uses as late as possible. 0N/A // Schedule all nodes in a post-order visit 0N/A // Scan for unvisited nodes 0N/A // For all uses, schedule late 0N/A // Skip already visited children 0N/A // do not traverse backward control edges 0N/A // Phi nodes always precede uses in a basic block 0N/A // Check for possible-anti-dependent 0N/A break;
// Not visited, not anti-dep; schedule it NOW 0N/A // Did I find an unvisited not-anti-dependent Node? 0N/A break;
// All done with children; post-visit 'self' 0N/A // Visit the unvisited Node. Contains the obvious push to 0N/A // indicate I'm entering a deeper level of recursion. I push the 0N/A // old state onto the _stack and set a new state and loop (recurse). 0N/A }
// End recursion loop 0N/A//------------------------------ComputeLatenciesBackwards---------------------- 0N/A// Compute the latency of all the instructions. 0N/A tty->
print(
"\n#---- ComputeLatenciesBackwards ----\n");
0N/A // Walk over all the nodes from last to first 0N/A // Set the latency for the definitions of this instruction 0N/A}
// end ComputeLatenciesBackwards 0N/A//------------------------------partial_latency_of_defs------------------------ 0N/A// Compute the latency impact of this node on all defs. This computes 0N/A// a number that increases as we approach the beginning of the routine. 0N/A // Set the latency for this instruction 0N/A tty->
print(
"# latency_to_inputs: node_latency[%d] = %d for node",
0N/A // Walk backwards thru projections 0N/A // If the defining block is not known, assume it is ok 0N/A tty->
print_cr(
"# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
0N/A//------------------------------latency_from_use------------------------------- 0N/A// Compute the latency of a specific use 0N/A // If self-reference, return no latency 0N/A // If the use is not a projection, then it is simple... 0N/A // Change this if we want local latencies 0N/A // This is a projection, just grab the latency of the use(s) 0N/A//------------------------------latency_from_uses------------------------------ 0N/A// Compute the latency of this instruction relative to all of it's uses. 0N/A// This computes a number that increases as we approach the beginning of the 0N/A // Set the latency for this instruction 0N/A tty->
print(
"# latency_from_outputs: node_latency[%d] = %d for node",
0N/A//------------------------------hoist_to_cheaper_block------------------------- 0N/A// Pick a block for node self, between early and LCA, that is a cheaper 0N/A// alternative to LCA. 0N/A // Turn off latency scheduling if scheduling is just plain off 0N/A // Do not hoist (to cover latency) instructions which target a 0N/A // single register. Hoisting stretches the live range of the 0N/A // single register and may force spilling. 0N/A tty->
print_cr(
"# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
0N/A // Walk up the dominator tree from LCA (Lowest common ancestor) to 0N/A // the earliest legal location. Capture the least execution frequency. 0N/A // Bailout without retry 0N/A // Don't hoist machine instructions to the root basic block 0N/A tty->
print_cr(
"# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
0N/A // because they may end up above other uses of their phi forcing 0N/A // their result register to be different from their input. 0N/A // See if the latency needs to be updated 0N/A//------------------------------schedule_late----------------------------------- 0N/A// Now schedule all codes as LATE as possible. This is the LCA in the 0N/A// dominator tree of all USES of a value. Pick the block with the least 0N/A// loop nesting depth that is lowest in the dominator tree. 0N/A // Walk over all the nodes from last to first 0N/A // Top node goes in bb #2 with other constants. 0N/A // It must be special-cased, because it has no out edges. 0N/A // No uses, just terminate 0N/A continue;
// Must be a dead machine projection 0N/A // If node is pinned in the block, then no scheduling can be done. 0N/A // Don't move exception creation 0N/A // Don't move CheckCastPP nodes away from their input, if the input 0N/A // is a rawptr (5071820). 0N/A // Gather LCA of all uses 0N/A // For all uses, find LCA 0N/A }
// (Hide defs of imax, i from rest of block.) 0N/A // Place temps in the block of their use. This isn't a 0N/A // requirement for correctness but it reduces useless 0N/A // interference between temps and other nodes. 0N/A // Check if 'self' could be anti-dependent on memory 0N/A // Hoist LCA above possible-defs and insert anti-dependences to 0N/A // defs in new LCA block. 0N/A // Somehow the LCA has moved above the earliest legal point. 0N/A // (One way this can happen is via memory_early_block.) 0N/A // Retry with subsume_loads == false 0N/A // If this is the first failure, the sentinel string will "stick" 0N/A // to the Compile object, and the C2Compiler will see it and retry. 0N/A // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) 0N/A // If there is no opportunity to hoist, then we're done. 0N/A // Must clone guys stay next to use; no hoisting allowed. 0N/A // Also cannot hoist guys that alter memory or are otherwise not 0N/A // allocatable (hoisting can make a value live longer, leading to 0N/A // anti and output dependency problems which are normally resolved 0N/A // by the register allocator giving everyone a different register). 0N/A // Now find the block with the least execution frequency. 0N/A // Start at the latest schedule and work up to the earliest schedule 0N/A // in the dominator tree. Thus the Node will dominate all its uses. 0N/A // Just use the LCA of the uses. 0N/A // Put the node into target block 0N/A // since precedence edges are only inserted when we're sure they 0N/A // are needed make sure that after placement in a block we don't 0N/A // need any new precedence edges. 0N/A }
// Loop until all nodes have been visited 0N/A}
// end ScheduleLate 0N/A//------------------------------GlobalCodeMotion------------------------------- 0N/A // Initialize the bbs.map for things on the proj_list 0N/A // Set the basic block for Nodes pinned into blocks 0N/A // Find the earliest Block any instruction can be placed in. Some 0N/A // instructions are pinned into Blocks. Unpinned instructions can 0N/A // appear in last block in which all their inputs occur. 0N/A // Bailout without retry 0N/A // Build Def-Use edges. 0N/A // Compute the latency information (via backwards walk) for all the 0N/A // instructions in the graph 0N/A // Now schedule all codes as LATE as possible. This is the LCA in the 0N/A // dominator tree of all USES of a value. Pick the block with the least 0N/A // loop nesting depth that is lowest in the dominator tree. 0N/A // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) 0N/A // schedule_late fails only when graph is incorrect. 0N/A tty->
print(
"\n---- Detect implicit null checks ----\n");
0N/A // Detect implicit-null-check opportunities. Basically, find NULL checks 0N/A // with suitable memory ops nearby. Use the memory op to do the NULL check. 0N/A // I can generate a memory op if there is not one nearby. 0N/A // Don't do it for natives, adapters, or runtime stubs 0N/A // ...and don't do it when there have been too many traps, globally. 0N/A // By reversing the loop direction we get a very minor gain on mpegaudio. 0N/A // Feel free to revert to a forward loop for clarity. 0N/A // The implicit_null_check will only perform the transformation 0N/A // if the null branch is truly uncommon, *and* it leads to an 0N/A // uncommon trap. Combined with the too_many_traps guards 0N/A // above, this prevents SEGV storms reported in 6366351, 0N/A // by recompiling offending methods without this optimization. 0N/A // Schedule locally. Right now a simple topological sort. 0N/A // Later, do a real latency aware scheduler. 0N/A // If we inserted any instructions between a Call and his CatchNode, 0N/A // clone the instructions on all paths below the Catch. 0N/A//------------------------------Estimate_Block_Frequency----------------------- 0N/A// Estimate block frequencies based on IfNode probabilities. 418N/A // Force conditional branches leading to uncommon traps to be unlikely, 418N/A // not because we get to the uncommon_trap with less relative frequency, 418N/A // but because an uncommon_trap typically causes a deopt, so we only get 0N/A // Create the loop tree and calculate loop depth. 0N/A // Compute block frequency of each block, relative to a single loop entry. 0N/A // Adjust all frequencies to be relative to a single method entry 673N/A // Save outmost loop frequency for LRG frequency threshold 0N/A // force paths ending at uncommon traps to be infrequent 0N/A//----------------------------create_loop_tree-------------------------------- 0N/A// Create a loop tree from the CFG 0N/A // Check that _loop field are clear...we could clear them if not. 0N/A // Sanity check that the RPO numbering is reflected in the _blocks array. 0N/A // It doesn't have to be for the loop tree to be built, but if it is not, 0N/A // then the blocks have been reordered since dom graph building...which 0N/A // may question the RPO numbering 0N/A // Assign blocks to loops 0N/A // Defensively filter out Loop nodes for non-single-entry loops. 0N/A // For all reasonable loops, the head occurs before the tail in RPO. 0N/A // The tail and (recursive) predecessors of the tail 0N/A // are made members of a new loop. 0N/A // Add to nloop so push_pred() will skip over inner loops 0N/A // Create a member list for each loop consisting 0N/A // of both blocks and (immediate child) loops. 0N/A // Not assigned to a loop. Add it to the method's pseudo loop. 0N/A // Not a nested loop. Make it a child of the method's pseudo loop. 0N/A // Add nested loop to member list of parent loop. 0N/A//------------------------------push_pred-------------------------------------- 0N/A // Filter out blocks for non-single-entry loops. 0N/A // For all reasonable loops, the head occurs before the tail in RPO. 0N/A // Make pred's loop be a child 0N/A // Continue with loop entry predecessor. 0N/A//------------------------------add_nested_loop-------------------------------- 0N/A// Make cl a child of the current loop in the loop tree. 0N/A//------------------------------compute_loop_depth----------------------------- 0N/A// Store the loop depth in each CFGLoop object. 0N/A// Recursively walk the children to do the same for them. 0N/A//------------------------------compute_freq----------------------------------- 0N/A// Compute the frequency of each block and loop, relative to a single entry 0N/A// into the dominating loop head. 0N/A // Bottom up traversal of loop tree (visit inner loops first.) 0N/A // Set loop head frequency to 1.0, then transitively 0N/A // compute frequency for all successors in the loop, 0N/A // as well as for each exit edge. Inner loops are 0N/A // treated as single blocks with loop exit targets 0N/A // as the successor blocks. 0N/A // Nested loops first 0N/A // For all loops other than the outer, "method" loop, 0N/A // sum and normalize the exit probability. The "method" loop 0N/A // should keep the initial exit probability of 1, so that 0N/A // inner blocks do not get erroneously scaled. 0N/A // Total the exit probabilities for this loop. 0N/A // Normalize the exit probabilities. Until now, the 0N/A // probabilities estimate the possibility of exit per 0N/A // a single loop iteration; afterward, they estimate 0N/A // the probability of exit per loop entry. 418N/A // Save the total, but guard against unreasonable probability, 0N/A // as the value is used to estimate the loop trip count. 0N/A // An infinite trip count would blur relative block 0N/A//------------------------------succ_prob------------------------------------- 0N/A// Determine the probability of reaching successor 'i' from the receiver block. 308N/A // Can only reach here if called after lcm. The original Op_If is gone, 308N/A // so we attempt to infer the probability from one or both of the 308N/A // If either successor has only one predecessor, then the 605N/A // probability estimate can be derived using the 308N/A // relative frequency of the successor and this block. 308N/A // Estimate using both successor frequencies 0N/A // Switch on branch type 0N/A // Conditionals pass on only part of their frequency 0N/A // If succ[i] is the FALSE branch, invert path info 0N/A // Divide the frequency between all successors evenly 0N/A // Fall-thru path gets the lion's share. 0N/A // Presume exceptional paths are equally unlikely 0N/A // Pass frequency straight thru to target 0N/A // Do not push out freq to root block 418N/A//------------------------------num_fall_throughs----------------------------- 418N/A// Return the number of fall-through candidates for a block 418N/A // In theory, either side can fall-thru, for simplicity sake, 418N/A // let's say only the false branch can now. 418N/A // Switch on branch type 418N/A//------------------------------succ_fall_through----------------------------- 418N/A// Return true if a specific successor could be fall-through target. 418N/A // In theory, either side can fall-thru, for simplicity sake, 418N/A // let's say only the false branch can now. 418N/A // Switch on branch type 418N/A//------------------------------update_uncommon_branch------------------------ 418N/A// Update the probability of a two-branch to be uncommon 418N/A // Which successor is ub? 418N/A // If ub is the true path, make the proability small, else 418N/A // ub is the false path, and make the probability large 418N/A // Get existing probability 0N/A//------------------------------update_succ_freq------------------------------- 605N/A// Update the appropriate frequency associated with block 'b', a successor of 0N/A// a block in this loop. 0N/A // back branch within the loop 0N/A // Do nothing now, the loop carried frequency will be 0N/A // adjust later in scale_freq(). 0N/A // simple branch within the loop 0N/A // branch is exit from this loop 0N/A // branch into nested loop 0N/A//------------------------------in_loop_nest----------------------------------- 0N/A// Determine if block b is in the receiver's loop nest. 0N/A//------------------------------scale_freq------------------------------------- 0N/A// Scale frequency of loops and blocks by trip counts from outer loops 0N/A// Do a top down traversal of loop tree (visit outer loops first.) 673N/A// Frequency of outer loop 0N/A//------------------------------dump_tree-------------------------------------- 0N/A//------------------------------dump-------------------------------------------