gcm.cpp revision 601
2N/A * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 2N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2N/A * This code is free software; you can redistribute it and/or modify it 2N/A * under the terms of the GNU General Public License version 2 only, as 2N/A * published by the Free Software Foundation. 2N/A * This code is distributed in the hope that it will be useful, but WITHOUT 2N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 2N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 2N/A * version 2 for more details (a copy is included in the LICENSE file that 2N/A * accompanied this code). 2N/A * You should have received a copy of the GNU General Public License version 2N/A * 2 along with this work; if not, write to the Free Software Foundation, 2N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 2N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 2N/A * CA 95054 USA or visit www.sun.com if you need additional information or 2N/A * have any questions. 2N/A// Portions of code courtesy of Clifford Click 2N/A// Optimization - Graph Style 2N/A#
include "incls/_precompiled.incl" // To avoid float value underflow //----------------------------schedule_node_into_block------------------------- // Insert node n into block b. Look for projections of n and make sure they // Set basic block of n, Add n to b, // After Matching, nearly any old Node may have projections trailing it. // These are usually machine-dependent flags. In any case, they might // float to another block below this one. Move them up. if (
buse != b) {
// In wrong block? //----------------------------replace_block_proj_ctrl------------------------- // Nodes that have is_block_proj() nodes as their control need to use // the appropriate Region for their actual block as their control since // the projection will be in a predecessor block. if (p !=
NULL && p != n) {
// Control from a block projection? // Find which output path belongs to projection // Change control to match head of successor basic block //------------------------------schedule_pinned_nodes-------------------------- // Set the basic block for Nodes pinned into blocks // Allocate node stack of size C->unique()+8 to avoid frequent realloc assert( n->
in(0),
"pinned Node must have Control" );
// Before setting block replace block_proj control edge for(
int i = n->
req() -
1; i >= 0; --i ) {
// For all inputs // Assert that new input b2 is dominated by all previous inputs. // Check this by by seeing that it is dominated by b1, the deepest // input observed until b2. // Detected an unschedulable graph. Print some nice stuff and die. for (
uint j=0; j<n->
len(); j++) {
// For all inputs if (
inn ==
NULL)
continue;
// Ignore NULL, missing inputs assert(
false,
"unscheduable graph");
// Find the last input dominated by all other inputs. for (
uint k = 0; k < n->
len(); k++) {
// For all inputs if (
inn ==
NULL)
continue;
// Ignore NULL, missing inputs // The new inb must be dominated by the previous deepb. // The various inputs must be linearly ordered in the dom // tree, or else there will not be a unique deepest block. //------------------------------schedule_early--------------------------------- // Find the earliest Block any instruction can be placed in. Some instructions // are pinned into Blocks. Unpinned instructions can appear in last block in // which all their inputs occur. // Allocate stack with enough space to avoid frequent realloc // roots.push(_root); _root will be processed among C->top() inputs // Use local variables nstack_top_n & nstack_top_i to cache values // Get parent node and next input's index from stack's top. // Fixup some control. Constants without control get attached // to root and nodes that use is_block_proj() nodes should be attached // to the region that starts their block. if (
in0 !=
NULL) {
// Control-dependent? }
else {
// n->in(0) == NULL if (n->
req() ==
1) {
// This guy is a constant with NO inputs? // First, visit all inputs and force them to get a block. If an // input is already in a block we quit following inputs (to avoid // cycles). Instead we put that Node on a worklist to be handled // later (since IT'S inputs may not have a block yet). bool done =
true;
// Assume all n's inputs will be processed while (i < n->
len()) {
// For all inputs if (
in ==
NULL)
continue;
// Ignore NULL, missing inputs // assert( !visited.test(in->_idx), "did not schedule early" ); nstack.
push(n, i);
// Save parent node and next input's index. done =
false;
// Not all n's inputs processed. break;
// continue while_nstack_nonempty; }
else if (!
is_visited) {
// Input not yet visited? roots.
push(
in);
// Visit this guy later, using worklist // All of n's inputs have been processed, complete post-processing. // Some instructions are pinned into a block. These include Region, // Phi, Start, Return, and other control-dependent instructions and // any projections which depend on them. // Set earliest legal block. // Finished all nodes on stack. // Process next node on the worklist 'roots'. // Get saved parent node and next input's index. //------------------------------dom_lca---------------------------------------- // Find least common ancestor in dominator tree // LCA is a current notion of LCA, to be raised above 'this'. // As a convenient boundary condition, return 'this' if LCA is NULL. // Find the LCA of those two nodes. anc =
anc->
_idom;
// Walk up till anc is as high as LCA LCA =
LCA->
_idom;
// Walk up till LCA is as high as anc while (
LCA !=
anc) {
// Walk both up till they are the same //--------------------------raise_LCA_above_use-------------------------------- // We are placing a definition, and have been given a def->use edge. // The definition must dominate the use, so move the LCA upward in the // dominator tree to dominate the use. If the use is a phi, adjust // the LCA only with the phi input paths which actually use this def. if (
buse ==
NULL)
return LCA;
// Unused killing Projs have no use block // Why does not this loop just break after finding the matching input to // chains. Means I cannot distinguish, from the def-use direction, which // of many use-defs lead from the same use to the same def. That is, this // Phi might have several uses of the same def. Each use appears in a // different predecessor block. But when I enter here, I cannot distinguish // which use-def edge I should find the predecessor block for. So I find // them all. Means I do a little extra work if a Phi uses the same value for (
uint j=
1; j<
pmax; j++) {
// For all inputs if (
use->
in(j) ==
def) {
// Found matching input? //----------------------------raise_LCA_above_marks---------------------------- // Return a new LCA that dominates LCA and any of its marked predecessors. // Search all my parents up to 'early' (exclusive), looking for predecessors // which are marked with the given index. Return the LCA (in the dom tree) // of all marked blocks. If there are none marked, return the original if (
mid ==
early)
continue;
// stop searching here // Test and set the visited bit. // Don't process the current LCA, otherwise the search may terminate early if (
LCA ==
early)
break;
// stop searching everywhere // Resume searching at that point, skipping intermediate levels. continue;
// Don't mark as visited to avoid early termination. // Keep searching through this block's predecessors. //--------------------------memory_early_block-------------------------------- // This is a variation of find_deepest_input, the heart of schedule_early. // Find the "early" block for a load, if we considered only memory and // address inputs, that is, if other data inputs were ignored. // Because a subset of edges are considered, the resulting block will // be earlier (at a shallower dom_depth) than the true schedule_early // point of the node. We compute this earlier block as a more permissive // site for anti-dependency insertion, but only if subsume_loads is enabled. // In the comparision below, add one to account for the control input, // which may be null, but always takes up a spot in the in array. // This "load" has more inputs than just the memory, base and index inputs. // For purposes of checking anti-dependences, we need to start // from the early block of only the address portion of the instruction, // and ignore other blocks that may have factored into the wider // schedule_early calculation. // The new inb must be dominated by the previous deepb. // The various inputs must be linearly ordered in the dom // tree, or else there will not be a unique deepest block. //--------------------------insert_anti_dependences--------------------------- // A load may need to witness memory that nearby stores can overwrite. // For each nearby store, either insert an "anti-dependence" edge // from the load to the store, or else move LCA upward to force the // load to (eventually) be scheduled in a block above the store. // Do not add edges to stores on distinct control-flow paths; // only add edges to stores which might interfere. // Return the (updated) LCA. There will not be any possibly interfering // store between the load's "early block" and the updated LCA. // Any stores in the updated LCA will have new precedence edges // back to the load. The caller is expected to schedule the load // in the LCA, in which case the precedence edges will make LCM // preserve anti-dependences. The caller may also hoist the load // above the LCA, if it is not the early block. // Compute the alias index. Loads and stores with different alias indices // do not need anti-dependence edges. // Load nodes should not consume all of memory. // Reporting a bottom type indicates a bug in adlc. // If some particular type of node validly consumes all of memory, // sharpen the preceding "if" to exclude it, so we can catch bugs here. tty->
print_cr(
"*** Possible Anti-Dependence Bug: Load consumes all of memory.");
"String compare is only known 'load' that does not conflict with any stores");
// It is impossible to spoil this load by putting stores before it, // because we know that the stores will never update the value // which 'load' must witness. // Note the earliest legal placement of 'load', as determined by // by the unique point in the dom tree where all memory effects // and other inputs are first available. (Computed by schedule_early.) // For normal loads, 'early' is the shallowest place (dom graph wise) // to look for anti-deps between this load and any store. // If we are subsuming loads, compute an "early" block that only considers // memory or address inputs. This block may be different than the // schedule_early block in that it could be at an even shallower depth in the // dominator tree, and allow for a broader discovery of anti-dependences. // %%% This extra checking fails because MergeMem nodes are not GVNed. // Provide "phi_inputs" to check if every input to a PhiNode is from the // original memory state. This indicates a PhiNode for which should not // prevent the load from sinking. For such a block, set_raise_LCA_mark // may be overly conservative. // Mechanism: count inputs seen for each Phi encountered in worklist_store. // 'load' uses some memory state; look for users of the same state. // Recurse through MergeMem nodes to the stores that use them. // Each of these stores is a possible definition of memory // that 'load' needs to use. We need to force 'load' // to occur before each such store. When the store is in // the same block as 'load', we insert an anti-dependence // The relevant stores "nearby" the load consist of a tree rooted // at initial_mem, with internal nodes of type MergeMem. // Therefore, the branches visited by the worklist are of this form: // initial_mem -> (MergeMem ->)* store // The anti-dependence constraints apply only to the fringe of this tree. // Examine a nearby store to see if it might interfere with our load. // MergeMems do not directly have anti-deps. // Treat them as internal nodes in a forward tree of memory states, // the leaves of which are each a 'possible-def'. mem =
store;
// It's not a possibly interfering store. // Be sure we don't get into combinatorial problems. // (Allow phis to be repeated; they can merge two relevant states.) if (j > 0)
continue;
// already on work list; do not repeat // Compute the alias index. Loads and stores with different alias // indices do not need anti-dependence edges. Wide MemBar's are // anti-dependent on everything (except immutable memories). // Most slow-path runtime calls do NOT modify Java memory, but // they can block and so write Raw memory. // Check for call into the runtime using the Java calling // convention (and from there into a wrapper); it has no // _method. Can't do this optimization for Native calls because // they CAN write to Java memory. // These runtime calls do not write to Java visible memory // (other than Raw) and so do not require anti-dependence edges. // Same for SafePoints: they read/write Raw but only read otherwise. // This is basically a workaround for SafePoints only defining control // instead of control + memory. // Some raw memory, such as the load of "top" at an allocation, // can be control dependent on the previous safepoint. See // comments in GraphKit::allocate_heap() about control input. // Inserting an anti-dep between such a safepoint and a use // creates a cycle, and will cause a subsequent failure in // local scheduling. (BugId 4919904) // (%%% How can a control input be a safepoint and not a projection??) // Identify a block that the current load must be above, // or else observe that 'store' is all the way up in the // earliest legal block for 'load'. In the latter case, // immediately insert an anti-dependence edge. // 'load' uses memory which is one (or more) of the Phi's inputs. // It must be scheduled not before the Phi, but rather before // each of the relevant Phi inputs. // Instead of finding the LCA of all inputs to a Phi that match 'mem', // we mark each corresponding predecessor block and do a combined // hoisting operation later (raise_LCA_above_marks). // Do not assert(store_block != early, "Phi merging memory after access") // PhiNode may be at start of block 'early' with backedge to 'early' if (
store->
in(j) ==
mem) {
// Found matching input? // If any predecessor of the Phi matches the load's "early block", // we do not need a precedence edge between the Phi and 'load' // since the load will be forced into a block preceeding the Phi. // This assert asks about correct handling of PhiNodes, which may not // have all input edges directly from 'mem'. See BugId 4621264 // Increment by exactly one even if there are multiple copies of 'mem' // coming into the phi, because we will run this block several times // if there are several copies of 'mem'. (That's how DU iterators work.) "Expect at least one phi input will not be from original memory state");
#
endif //TRACK_PHI_INPUTS // 'store' is between the current LCA and earliest possible block. // Label its block, and decide later on how to raise the LCA // to include the effect on LCA of this store. // If this store's block gets chosen as the raised LCA, we // will find him on the non_early_stores list and stick him // with a precedence edge. // (But, don't bother if LCA is already raised all the way.) // Found a possibly-interfering store in the load's 'early' block. // This means 'load' cannot sink at all in the dominator tree. // Add an anti-dep edge, and squeeze 'load' into the highest block. // This turns off the process of gathering non_early_stores. // (Worklist is now empty; all nearby stores have been visited.) // Finished if 'load' must be scheduled in its 'early' block. // If we found any stores there, they have already been given // We get here only if there are no possibly-interfering stores // in the load's 'early' block. Move LCA up above all predecessors // which contain stores we have noted. // The raised LCA block can be a home to such interfering stores, // but its predecessors must not contain any such stores. // The raised LCA will be a lower bound for placing the load, // preventing the load from sinking past any block containing // a store that may invalidate the memory state required by 'load'. // Insert anti-dependence edges from 'load' to each store // in the non-early LCA block. // Mine the non_early_stores list for such stores. // add anti_dependence from store to load in its own block // Any other stores we found must be either inside the new LCA // or else outside the original LCA. In the latter case, they // did not interfere with any use of 'load'. // Return the highest block containing stores; any stores // within that block have been given anti-dependence edges. // This class is used to iterate backwards over the nodes in the graph. // Constructor for the iterator // Postincrement operator to iterate over the nodes // Constructor for the Node_Backward_Iterator // The stack should contain exactly the root // Clear the visited bits // Iterator for the Node_Backward_Iterator // If the _stack is empty, then just return NULL: finished. // '_stack' is emulating a real _stack. The 'visit-all-users' loop has been // made stateless, so I do not need to record the index 'i' on my _stack. // Instead I visit all users each time, scanning for unvisited users. // I visit unvisited not-anti-dependence users first, then anti-dependent // I cycle here when I am entering a deeper level of recursion. // The key variable 'self' was set prior to jumping here. // Now schedule all uses as late as possible. // Schedule all nodes in a post-order visit // Scan for unvisited nodes // For all uses, schedule late // Skip already visited children // do not traverse backward control edges // Phi nodes always precede uses in a basic block // Check for possible-anti-dependent break;
// Not visited, not anti-dep; schedule it NOW // Did I find an unvisited not-anti-dependent Node? break;
// All done with children; post-visit 'self' // Visit the unvisited Node. Contains the obvious push to // indicate I'm entering a deeper level of recursion. I push the // old state onto the _stack and set a new state and loop (recurse). //------------------------------ComputeLatenciesBackwards---------------------- // Compute the latency of all the instructions. tty->
print(
"\n#---- ComputeLatenciesBackwards ----\n");
// Walk over all the nodes from last to first // Set the latency for the definitions of this instruction }
// end ComputeLatenciesBackwards//------------------------------partial_latency_of_defs------------------------ // Compute the latency impact of this node on all defs. This computes // a number that increases as we approach the beginning of the routine. // Set the latency for this instruction tty->
print(
"# latency_to_inputs: node_latency[%d] = %d for node",
// Walk backwards thru projections // If the defining block is not known, assume it is ok tty->
print_cr(
"# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
//------------------------------latency_from_use------------------------------- // Compute the latency of a specific use // If self-reference, return no latency // If the use is not a projection, then it is simple... // Change this if we want local latencies tty->
print_cr(
"# %d + edge_latency(%d) == %d -> %d, latency = %d",
// This is a projection, just grab the latency of the use(s) //------------------------------latency_from_uses------------------------------ // Compute the latency of this instruction relative to all of it's uses. // This computes a number that increases as we approach the beginning of the // Set the latency for this instruction tty->
print(
"# latency_from_outputs: node_latency[%d] = %d for node",
//------------------------------hoist_to_cheaper_block------------------------- // Pick a block for node self, between early and LCA, that is a cheaper // Turn off latency scheduling if scheduling is just plain off // Do not hoist (to cover latency) instructions which target a // single register. Hoisting stretches the live range of the // single register and may force spilling. tty->
print(
"# Find cheaper block for latency %d: ",
tty->
print_cr(
"# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
// Walk up the dominator tree from LCA (Lowest common ancestor) to // the earliest legal location. Capture the least execution frequency. // Don't hoist machine instructions to the root basic block tty->
print_cr(
"# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
// because they may end up above other uses of their phi forcing // their result register to be different from their input. tty->
print_cr(
"# Choose block B%d with start latency=%d and freq=%g",
// See if the latency needs to be updated //------------------------------schedule_late----------------------------------- // Now schedule all codes as LATE as possible. This is the LCA in the // dominator tree of all USES of a value. Pick the block with the least // loop nesting depth that is lowest in the dominator tree. tty->
print(
"\n#---- schedule_late ----\n");
// Walk over all the nodes from last to first // Top node goes in bb #2 with other constants. // It must be special-cased, because it has no out edges. // No uses, just terminate continue;
// Must be a dead machine projection // If node is pinned in the block, then no scheduling can be done. // Don't move exception creation // Don't move CheckCastPP nodes away from their input, if the input // is a rawptr (5071820). // Gather LCA of all uses // For all uses, find LCA }
// (Hide defs of imax, i from rest of block.) // Place temps in the block of their use. This isn't a // requirement for correctness but it reduces useless // interference between temps and other nodes. // Check if 'self' could be anti-dependent on memory // Hoist LCA above possible-defs and insert anti-dependences to // defs in new LCA block. // Somehow the LCA has moved above the earliest legal point. // (One way this can happen is via memory_early_block.) // Retry with subsume_loads == false // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) // If there is no opportunity to hoist, then we're done. // Must clone guys stay next to use; no hoisting allowed. // Also cannot hoist guys that alter memory or are otherwise not // allocatable (hoisting can make a value live longer, leading to // anti and output dependency problems which are normally resolved // by the register allocator giving everyone a different register). // Now find the block with the least execution frequency. // Start at the latest schedule and work up to the earliest schedule // in the dominator tree. Thus the Node will dominate all its uses. // Just use the LCA of the uses. // Put the node into target block // since precedence edges are only inserted when we're sure they // are needed make sure that after placement in a block we don't // need any new precedence edges. }
// Loop until all nodes have been visited//------------------------------GlobalCodeMotion------------------------------- tty->
print(
"\n---- Start GlobalCodeMotion ----\n");
// Initialize the bbs.map for things on the proj_list // Set the basic block for Nodes pinned into blocks // Find the earliest Block any instruction can be placed in. Some // instructions are pinned into Blocks. Unpinned instructions can // appear in last block in which all their inputs occur. // Compute the latency information (via backwards walk) for all the // instructions in the graph // Now schedule all codes as LATE as possible. This is the LCA in the // dominator tree of all USES of a value. Pick the block with the least // loop nesting depth that is lowest in the dominator tree. // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) // schedule_late fails only when graph is incorrect. tty->
print(
"\n---- Detect implicit null checks ----\n");
// Detect implicit-null-check opportunities. Basically, find NULL checks // with suitable memory ops nearby. Use the memory op to do the NULL check. // I can generate a memory op if there is not one nearby. // Don't do it for natives, adapters, or runtime stubs // ...and don't do it when there have been too many traps, globally. // By reversing the loop direction we get a very minor gain on mpegaudio. // Feel free to revert to a forward loop for clarity. // The implicit_null_check will only perform the transformation // if the null branch is truly uncommon, *and* it leads to an // uncommon trap. Combined with the too_many_traps guards // above, this prevents SEGV storms reported in 6366351, // by recompiling offending methods without this optimization. tty->
print(
"\n---- Start Local Scheduling ----\n");
// Schedule locally. Right now a simple topological sort. // Later, do a real latency aware scheduler. // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. tty->
print(
"\n---- After GlobalCodeMotion ----\n");
//------------------------------Estimate_Block_Frequency----------------------- // Estimate block frequencies based on IfNode probabilities. // Force conditional branches leading to uncommon traps to be unlikely, // not because we get to the uncommon_trap with less relative frequency, // but because an uncommon_trap typically causes a deopt, so we only get // Create the loop tree and calculate loop depth. // Compute block frequency of each block, relative to a single loop entry. // Adjust all frequencies to be relative to a single method entry // force paths ending at uncommon traps to be infrequent //----------------------------create_loop_tree-------------------------------- // Create a loop tree from the CFG // Check that _loop field are clear...we could clear them if not. // Sanity check that the RPO numbering is reflected in the _blocks array. // It doesn't have to be for the loop tree to be built, but if it is not, // then the blocks have been reordered since dom graph building...which // may question the RPO numbering assert(b->
_rpo == i,
"unexpected reverse post order number");
// Assign blocks to loops // Defensively filter out Loop nodes for non-single-entry loops. // For all reasonable loops, the head occurs before the tail in RPO. // The tail and (recursive) predecessors of the tail // are made members of a new loop. // Add to nloop so push_pred() will skip over inner loops // Create a member list for each loop consisting // of both blocks and (immediate child) loops. // Not assigned to a loop. Add it to the method's pseudo loop. // Not a nested loop. Make it a child of the method's pseudo loop. // Add nested loop to member list of parent loop. //------------------------------push_pred-------------------------------------- // Filter out blocks for non-single-entry loops. // For all reasonable loops, the head occurs before the tail in RPO. // Make pred's loop be a child // Continue with loop entry predecessor. //------------------------------add_nested_loop-------------------------------- // Make cl a child of the current loop in the loop tree. assert(
cl !=
this,
"not my own parent");
//------------------------------compute_loop_depth----------------------------- // Store the loop depth in each CFGLoop object. // Recursively walk the children to do the same for them. //------------------------------compute_freq----------------------------------- // Compute the frequency of each block and loop, relative to a single entry // into the dominating loop head. // Bottom up traversal of loop tree (visit inner loops first.) // Set loop head frequency to 1.0, then transitively // compute frequency for all successors in the loop, // as well as for each exit edge. Inner loops are // treated as single blocks with loop exit targets // as the successor blocks. // For all loops other than the outer, "method" loop, // sum and normalize the exit probability. The "method" loop // should keep the initial exit probability of 1, so that // inner blocks do not get erroneously scaled. // Total the exit probabilities for this loop. // Normalize the exit probabilities. Until now, the // probabilities estimate the possibility of exit per // a single loop iteration; afterward, they estimate // the probability of exit per loop entry. // Save the total, but guard against unreasonable probability, // as the value is used to estimate the loop trip count. // An infinite trip count would blur relative block //------------------------------succ_prob------------------------------------- // Determine the probability of reaching successor 'i' from the receiver block. // Can only reach here if called after lcm. The original Op_If is gone, // so we attempt to infer the probability from one or both of the // If either successor has only one predecessor, then the // probabiltity estimate can be derived using the // relative frequency of the successor and this block. // Estimate using both successor frequencies assert (i <
2,
"just checking");
// Conditionals pass on only part of their frequency // If succ[i] is the FALSE branch, invert path info return 1.0f -
prob;
// not taken // Divide the frequency between all successors evenly // Fall-thru path gets the lion's share. // Presume exceptional paths are equally unlikely // Pass frequency straight thru to target // Do not push out freq to root block //------------------------------num_fall_throughs----------------------------- // Return the number of fall-through candidates for a block // In theory, either side can fall-thru, for simplicity sake, // let's say only the false branch can now. //------------------------------succ_fall_through----------------------------- // Return true if a specific successor could be fall-through target. // In theory, either side can fall-thru, for simplicity sake, // let's say only the false branch can now. //------------------------------update_uncommon_branch------------------------ // Update the probability of a two-branch to be uncommon // Which successor is ub? assert(s <
2,
"uncommon successor must be found");
// If ub is the true path, make the proability small, else // ub is the false path, and make the probability large // Get existing probability //------------------------------update_succ_freq------------------------------- // Update the appropriate frequency associated with block 'b', a succesor of // back branch within the loop // Do nothing now, the loop carried frequency will be // adjust later in scale_freq(). // simple branch within the loop // branch is exit from this loop // branch into nested loop //------------------------------in_loop_nest----------------------------------- // Determine if block b is in the receiver's loop nest. //------------------------------scale_freq------------------------------------- // Scale frequency of loops and blocks by trip counts from outer loops // Do a top down traversal of loop tree (visit outer loops first.) //------------------------------dump_tree-------------------------------------- //------------------------------dump------------------------------------------- tty->
print(
"%s: %d trip_count: %6.0f freq: %6.0f\n",