superword.cpp revision 221
/*
* Copyright 2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
#include "incls/_precompiled.incl"
#include "incls/_superword.cpp.incl"
//
// S U P E R W O R D T R A N S F O R M
//=============================================================================
//------------------------------SuperWord---------------------------
{}
//------------------------------transform_loop---------------------------
// Check for no control flow in body (other than exit)
// Make sure the are no extra control users of the loop backedge
return;
}
// Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
// Do vectors exist on this architecture?
if (vector_width_in_bytes() == 0) return;
init(); // initialize data structures
// For now, define one block which is the entire loop body
SLP_extract();
}
//------------------------------SLP_extract---------------------------
// Extract the superword level parallelism
//
// 1) A reverse post-order of nodes in the block is constructed. By scanning
// this list from first to last, all definitions are visited before their uses.
//
// 2) A point-to-point dependence graph is constructed between memory references.
// This simplies the upcoming "independence" checker.
//
// 3) The maximum depth in the node graph from the beginning of the block
// to each node is computed. This is used to prune the graph search
// in the independence checker.
//
// 4) For integer types, the necessary bit width is propagated backwards
// from stores to allow packed operations on byte, char, and short
// integers. This reverses the promotion to type "int" that javac
// did for operations like: char c1,c2,c3; c1 = c2 + c3.
//
// 5) One of the memory references is picked to be an aligned vector reference.
// The pre-loop trip count is adjusted to align this reference in the
// unrolled body.
//
// 6) The initial set of pack pairs is seeded with memory references.
//
// 7) The set of pack pairs is extended by following use->def and def->use links.
//
// 8) The pairs are combined into vector sized packs.
//
// 9) Reorder the memory slices to co-locate members of the memory packs.
//
// 10) Generate ideal vector nodes for the final set of packs and where necessary,
// inserting scalar promotion, vector creation from multiple scalars, and
// extraction of scalar values from vectors.
//
void SuperWord::SLP_extract() {
// Ready the block
construct_bb();
// Attempt vectorization
filter_packs();
schedule();
output();
}
//------------------------------find_adjacent_refs---------------------------
// Find the adjacent memory references and create pack pairs for them.
// This is the initial set of packs that will then be extended by
// following use->def and def->use links. The align positions are
// assigned relative to the reference "align_to_ref"
void SuperWord::find_adjacent_refs() {
// Get list of memory operations
if (align != bottom_align) {
}
}
}
// Find a memory reference to align to. The pre-loop trip count
// is modified to align this reference to a vector-aligned address
if (align_to_ref() == NULL) return;
int vw = vector_width_in_bytes();
#ifndef PRODUCT
if (TraceSuperWord)
#endif
// Set alignment relative to "align_to_ref"
set_alignment(s, align);
} else {
}
}
// Create initial pack pairs of memory operations
}
}
}
}
#ifndef PRODUCT
if (TraceSuperWord) {
}
#endif
}
//------------------------------find_align_to_ref---------------------------
// Find a memory reference to align the loop induction variable to.
// Looks first at stores then at loads, looking for a memory reference
// with the largest number of references similar to it.
// Count number of comparable memory ops
// Discard if pre loop can't align this reference
if (!ref_is_alignable(p1)) {
continue;
}
}
}
}
}
// Find Store (or Load) with the greatest number of "comparable" references
int max_ct = 0;
int max_idx = -1;
int min_iv_offset = max_jint;
if (s->is_Store()) {
SWPointer p(s, this);
p.offset_in_bytes() < min_iv_offset)) {
max_idx = j;
min_iv_offset = p.offset_in_bytes();
}
}
}
// If no stores, look at loads
if (max_ct == 0) {
if (s->is_Load()) {
SWPointer p(s, this);
p.offset_in_bytes() < min_iv_offset)) {
max_idx = j;
min_iv_offset = p.offset_in_bytes();
}
}
}
}
if (max_ct > 0)
#ifndef PRODUCT
if (TraceSuperWord && Verbose) {
s->dump();
}
}
#endif
}
//------------------------------ref_is_alignable---------------------------
// Can the preloop align the reference to position zero in the vector?
if (!p.has_iv()) {
return true; // no induction variable
}
// Stride one accesses are alignable.
return true;
// If initial offset from start of object is computable,
// compute alignment within the vector.
int vw = vector_width_in_bytes();
if (span > 0) {
} else {
}
}
}
return false;
}
//---------------------------dependence_graph---------------------------
// Construct dependency graph.
// A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
void SuperWord::dependence_graph() {
// First, assign a dependence node to each memory node
}
}
// For each memory slice, create the dependences
for (int i = 0; i < _mem_slice_head.length(); i++) {
// Get slice in predecessor order (last is first)
// Make the slice dependent on the root
// Create a sink for the slice
// Now visit each pair of memory ops, creating the edges
// If no dependency yet, use slice
}
bool sink_dependent = true;
for (int k = j - 1; k >= 0; k--) {
continue;
if (SuperWordRTDepCheck &&
// Create a runtime check to disambiguate
// Possibly same address
sink_dependent = false;
}
}
if (sink_dependent) {
}
}
#ifndef PRODUCT
if (TraceSuperWord) {
}
}
#endif
}
#ifndef PRODUCT
if (TraceSuperWord) {
for (int r = 0; r < _disjoint_ptrs.length(); r++) {
}
}
#endif
}
//---------------------------mem_slice_preds---------------------------
// Return a memory slice (node list) in predecessor order starting at "start"
while (true) {
}
} else {
// FIXME
// Either unrolling is causing a memory edge not to disappear,
// or need to run igvn.optimize() again before SLP
// Ditto. Not sure what else to check further.
// StoreCM has an input edge used as a precedence edge.
// Maybe an issue when oop stores are vectorized.
} else {
}
}
}
if (n == stop) break;
prev = n;
}
}
//------------------------------stmts_can_pack---------------------------
// Can s1 and s2 be in a pack with s1 immediately preceeding s2 and
// s1 aligned at "align"
return true;
}
}
}
}
}
}
return false;
}
//------------------------------exists_at---------------------------
// Does s exist in a pack at position pos?
return true;
}
}
return false;
}
//------------------------------are_adjacent_refs---------------------------
// Is s1 immediately before s2 in memory?
// FIXME - co_locate_pack fails on Stores in different mem-slices, so
// only pack memops that are in the same alias set until that's fixed.
return false;
}
//------------------------------isomorphic---------------------------
// Are s1 and s2 similar?
return true;
}
//------------------------------independent---------------------------
// Is there no data path from s1 to s2 or s2 to s1?
// assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
}
//------------------------------independent_path------------------------------
// Helper for independent
return false;
}
return false;
}
}
}
return true;
}
//------------------------------set_alignment---------------------------
}
//------------------------------data_size---------------------------
return bsize;
}
//------------------------------extend_packlist---------------------------
// Extend packset by following use->def and def->use links from pack members.
void SuperWord::extend_packlist() {
bool changed;
do {
changed = false;
changed |= follow_use_defs(p);
changed |= follow_def_uses(p);
}
} while (changed);
#ifndef PRODUCT
if (TraceSuperWord) {
}
#endif
}
//------------------------------follow_use_defs---------------------------
// Extend the packset by visiting operand definitions of nodes in pack p
bool changed = false;
continue;
changed = true;
}
}
}
return changed;
}
//------------------------------follow_def_uses---------------------------
// Extend the packset by visiting uses of nodes in pack p
bool changed = false;
int savings = -1;
continue;
if (my_savings > savings) {
}
}
}
}
if (savings >= 0) {
changed = true;
}
return changed;
}
//---------------------------opnd_positions_match-------------------------
// Is the use of d1 in u1 at the same operand position as d2 in u2?
do {
return false;
}
return true;
}
//------------------------------est_savings---------------------------
// Estimate the savings from executing s1 and s2 as a pack
// inputs
} else {
}
}
}
// uses of result
ct++;
}
}
}
}
}
}
return save;
}
//------------------------------costs---------------------------
//------------------------------combine_packs---------------------------
// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
void SuperWord::combine_packs() {
bool changed;
do {
changed = false;
}
changed = true;
}
}
}
} while (changed);
}
}
#ifndef PRODUCT
if (TraceSuperWord) {
}
#endif
}
//-----------------------------construct_my_pack_map--------------------------
// Construct the map from nodes to packs. Only valid after the
// point where a node is only in one pack (after combine_packs).
void SuperWord::construct_my_pack_map() {
set_my_pack(s, p);
}
}
}
//------------------------------filter_packs---------------------------
// Remove packs that are not implemented or not profitable.
void SuperWord::filter_packs() {
// Remove packs that are not implemented
if (!impl) {
#ifndef PRODUCT
if (TraceSuperWord && Verbose) {
}
#endif
remove_pack_at(i);
}
}
// Remove packs that are not profitable
bool changed;
do {
changed = false;
if (!prof) {
#ifndef PRODUCT
if (TraceSuperWord && Verbose) {
}
#endif
remove_pack_at(i);
changed = true;
}
}
} while (changed);
#ifndef PRODUCT
if (TraceSuperWord) {
}
#endif
}
//------------------------------implemented---------------------------
// Can code be generated for pack p?
}
//------------------------------profitable---------------------------
// For pack p, are all operands and all uses (with in the block) vector?
// Return false if some input is not vector and inside block
if (!is_vector_use(p0, i)) {
// For now, return false if not scalar promotion case (inputs are the same.)
// Later, implement PackNode and allow differring, non-vector inputs
// (maybe just the ones from outside the block.)
return false;
}
}
}
// For now, return false if not all uses are vector.
// Later, implement ExtractNode and allow non-vector uses (maybe
// just the ones outside the block.)
if (def == n) {
if (!is_vector_use(use, k)) {
return false;
}
}
}
}
}
}
return true;
}
//------------------------------schedule---------------------------
// Adjust the memory graph for the packed operations
// Co-locate in the memory graph the members of each memory pack
}
}
//------------------------------co_locate_pack---------------------------
// Within a pack, move stores down to the last executed store,
// and move loads up to the first executed load.
// Push Stores down towards last executed pack member
while (true) {
// Forward users of my memory state to my input memory state
--i; // deleted this edge; rescan position
}
}
// put current immediately before insert_pt
}
}
// Pull Loads up towards first executed pack member
// Give each load same memory state as first
}
}
}
//------------------------------output---------------------------
// Convert packs into vector node operations
// MUST ENSURE main loop's initial value is properly aligned:
// (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
// Insert extract (unpack) operations for scalar uses
}
if (p && n == executed_last(p)) {
if (n->is_Load()) {
} else if (n->is_Store()) {
// Promote value to be stored to vector
} else if (n->req() == 3) {
// Promote operands to vector
} else {
}
}
}
}
}
//------------------------------vector_opd---------------------------
// Create a vector operand for the nodes in pack p for operand: in(opd_idx)
bool same_opd = true;
same_opd = false;
break;
}
}
if (same_opd) {
}
// Convert scalar input to vector. Use p0's type because it's container
// maybe smaller than the operand's container.
return vn;
}
// Insert pack operation
}
return pk;
}
//------------------------------insert_extracts---------------------------
// If a use of pack p is not a vector use, then replace the
// use with an extract operation.
// Inspect each use of each pack member. For each use that is
// not a vector use, replace the use with an extract operation.
if (def == n) {
if (!is_vector_use(use, k)) {
}
}
}
}
}
while (_n_idx_list.is_nonempty()) {
_n_idx_list.pop();
// Insert extract operation
}
}
//------------------------------is_vector_use---------------------------
// Is use->in(u_idx) a vector use?
// check for scalar promotion
}
return true;
}
return false;
return false;
}
return true;
}
//------------------------------construct_bb---------------------------
// Construct reverse postorder list of block members
void SuperWord::construct_bb() {
// Find non-control nodes with no inputs from within block,
// create a temporary map from node _idx to bb_idx for use
// by the visited and post_visited sets,
// and count number of nodes in block.
int bb_ct = 0;
set_bb_idx(n, i); // Create a temporary map
if (in_bb(n)) {
bb_ct++;
if (!n->is_CFG()) {
bool found = false;
found = true;
break;
}
}
if (!found) {
_data_entry.push(n);
}
}
}
}
// Find memory slices (head and tail)
_mem_slice_head.push(n);
}
}
// Create an RPO list of nodes in block
// Push all non-control nodes with no inputs from within block, then control entry
for (int j = 0; j < _data_entry.length(); j++) {
visited_set(n);
}
// Do a depth first walk over out edges
int size;
if (!visited_test_set(n)) {
// forward arc in graph
} else if (!post_visited_test(n)) {
// cross or back arc
// Don't go around backedge
}
}
// There were no additional uses, post visit node now
rpo_idx--;
post_visited_set(n);
}
} else {
}
}
// Create real map of block indices for nodes
set_bb_idx(n, j);
}
initialize_bb(); // Ensure extra info is allocated.
#ifndef PRODUCT
if (TraceSuperWord) {
print_bb();
for (int m = 0; m < _data_entry.length(); m++) {
}
for (int m = 0; m < _mem_slice_head.length(); m++) {
}
}
#endif
}
//------------------------------initialize_bb---------------------------
// Initialize per node info
void SuperWord::initialize_bb() {
}
//------------------------------bb_insert_after---------------------------
// Insert n into block after pos
// Make room
}
}
// Set value
// Adjust map from node->_idx to _block index
}
}
//------------------------------compute_max_depth---------------------------
// Compute max depth for expressions from beginning of block
// Use to prune search paths during test for independence.
void SuperWord::compute_max_depth() {
int ct = 0;
bool again;
do {
again = false;
if (!n->is_Phi()) {
int d_in = 0;
}
}
again = true;
}
}
}
ct++;
} while (again);
#ifndef PRODUCT
if (TraceSuperWord && Verbose)
#endif
}
//-------------------------compute_vector_element_type-----------------------
// Compute necessary vector element type for expressions
// This propagates backwards a narrower integer type when the
// upper bits of the value are not needed.
// Example: char a,b,c; a = b + c;
// Normally the type of the add is integer, but for packed character
// operations the type of the add needs to be char.
void SuperWord::compute_vector_element_type() {
#ifndef PRODUCT
if (TraceSuperWord && Verbose)
#endif
// Initial type
set_velt_type(n, vt);
}
// Propagate narrowed type backwards through operations
// that don't depend on higher order bits
// Only integer types need be examined
if (n->bottom_type()->isa_int()) {
// Don't propagate through a type conversion
continue;
case Op_LShiftI: case Op_LShiftL:
bool same_type = true;
same_type = false;
break;
}
}
if (same_type) {
}
}
}
}
}
}
#ifndef PRODUCT
if (TraceSuperWord && Verbose) {
n->dump();
}
}
#endif
}
//------------------------------memory_alignment---------------------------
// Alignment within a vector memory reference
SWPointer p(s, this);
if (!p.valid()) {
return bottom_align;
}
int offset = p.offset_in_bytes();
return off_mod;
}
//---------------------------container_type---------------------------
// Smallest type containing range of values
}
if (t->basic_type() == T_INT) {
}
return t;
}
//-------------------------vector_opd_range-----------------------
// (Start, end] half-open range defining which operands are vector
switch (n->Opcode()) {
case Op_LoadP:
*start = 0;
*end = 0;
return;
case Op_StoreP:
return;
case Op_LShiftI: case Op_LShiftL:
*start = 1;
*end = 2;
return;
*start = 2;
return;
}
*start = 1;
}
//------------------------------in_packset---------------------------
// Are s1 and s2 in a pack pair and ordered as s1,s2?
return true;
}
}
return false;
}
//------------------------------in_pack---------------------------
// Is s in pack p?
if (p->at(i) == s) {
return p;
}
}
return NULL;
}
//------------------------------remove_pack_at---------------------------
// Remove the pack at position pos in the packset
set_my_pack(s, NULL);
}
}
//------------------------------executed_first---------------------------
// Return the node executed first in pack p. Uses the RPO block list
// to determine order.
n = s;
}
}
return n;
}
//------------------------------executed_last---------------------------
// Return the node executed last in pack p.
n = s;
}
}
return n;
}
//----------------------------align_initial_loop_index---------------------------
// to align_to_ref will be a position zero in the vector.
// (iv + k) mod vector_align == 0
// Where we put new limit calculations
// Ensure the original loop limit is available from the
// pre-loop Opaque1 node.
// Given:
// lim0 == original pre loop limit
// V == v_align (power of 2)
// invar == extra invariant piece of the address expression
// e == k [ +/- invar ]
//
// When reassociating expressions involving '%' the basic rules are:
// (a - b) % k == 0 => a % k == b % k
// and:
// (a + b) % k == 0 => a % k == (k - b) % k
//
// For stride > 0 && scale > 0,
// Derive the new pre-loop limit "lim" such that the two constraints:
// (1) lim = lim0 + N (where N is some positive integer < V)
// (2) (e + lim) % V == 0
// are true.
//
// Substituting (1) into (2),
// (e + lim0 + N) % V == 0
// solve for N:
// N = (V - (e + lim0)) % V
// substitute back into (1), so that new limit
// lim = lim0 + (V - (e + lim0)) % V
//
// For stride > 0 && scale < 0
// Constraints:
// lim = lim0 + N
// (e - lim) % V == 0
// Solving for lim:
// (e - lim0 - N) % V == 0
// N = (e - lim0) % V
// lim = lim0 + (e - lim0) % V
//
// For stride < 0 && scale > 0
// Constraints:
// lim = lim0 - N
// (e + lim) % V == 0
// Solving for lim:
// (e + lim0 - N) % V == 0
// N = (e + lim0) % V
// lim = lim0 - (e + lim0) % V
//
// For stride < 0 && scale < 0
// Constraints:
// lim = lim0 - N
// (e - lim) % V == 0
// Solving for lim:
// (e - lim0 + N) % V == 0
// N = (V - (e - lim0)) % V
// lim = lim0 - (V - (e - lim0)) % V
// incorporate any extra invariant piece producing k +/- invar >>> log2(elt)
if (align_to_ref_p.negate_invar()) {
} else {
}
}
// compute e +/- lim0
if (scale < 0) {
} else {
}
// compute V - (e +/- lim0)
}
// compute N = (exp) % V
// substitute back into (1), so that new limit
// lim = lim0 + N
if (stride < 0) {
} else {
}
Node* constrained =
}
//----------------------------get_pre_loop_end---------------------------
// Find pre loop end from main loop. Returns null if none.
return pre_end;
}
//------------------------------init---------------------------
_data_entry.clear();
_node_info.clear();
}
//------------------------------print_packset---------------------------
void SuperWord::print_packset() {
#ifndef PRODUCT
print_pack(p);
}
#endif
}
//------------------------------print_pack---------------------------
print_stmt(p->at(i));
}
}
//------------------------------print_bb---------------------------
#ifndef PRODUCT
if (n) {
n->dump();
}
}
#endif
}
//------------------------------print_stmt---------------------------
#ifndef PRODUCT
s->dump();
#endif
}
//------------------------------blank---------------------------
static char blanks[101];
return blanks;
}
//==============================SWPointer===========================
//----------------------------SWPointer------------------------
return;
}
// Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
for (int i = 0; i < 3; i++) {
return;
}
break; // stop looking at addp's
}
}
}
// Following is used to create a temporary object during
// the pattern match of an address expression.
//------------------------scaled_iv_plus_offset--------------------
// Match: k*iv + offset
// where: k is a constant that maybe zero, and
// offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
if (scaled_iv(n)) {
return true;
}
if (offset_plus_k(n)) {
return true;
}
return true;
}
return true;
}
return true;
}
_scale *= -1;
return true;
}
}
return false;
}
//----------------------------scaled_iv------------------------
// Match: k*iv where k is a constant that's not zero
if (_scale != 0) {
return false; // already found a scale
}
if (n == iv()) {
_scale = 1;
return true;
}
return true;
return true;
}
} else if (opc == Op_LShiftI) {
return true;
}
} else if (opc == Op_ConvI2L) {
return true;
}
} else if (opc == Op_LShiftL) {
// Need to preserve the current _offset value, so
// create a temporary object for this expression subtree.
// Hacky, so should re-engineer the address pattern match.
return true;
}
}
}
}
return false;
}
//----------------------------offset_plus_k------------------------
// Match: offset is (k [+/- invariant])
// where k maybe zero and invariant is optional, but not both.
return true;
// Okay if value fits into an int
const TypeLong* t = n->find_long_type();
return true;
}
return false;
}
return true;
return true;
}
}
return true;
_negate_invar = !negate;
return true;
}
}
if (invariant(n)) {
_invar = n;
return true;
}
return false;
}
//----------------------------print------------------------
#ifndef PRODUCT
#endif
}
// ========================= OrderedPair =====================
// ========================= SWNodeInfo =====================
// ============================ DepGraph ===========================
//------------------------------make_node---------------------------
// Make a new dependence graph node for an ideal node.
}
return m;
}
//------------------------------make_edge---------------------------
// Make a new dependence graph edge from dpred -> dsucc
dpred->set_out_head(e);
dsucc->set_in_head(e);
return e;
}
// ========================== DepMem ========================
//------------------------------in_cnt---------------------------
int ct = 0;
return ct;
}
//------------------------------out_cnt---------------------------
int ct = 0;
return ct;
}
//------------------------------print-----------------------------
#ifndef PRODUCT
}
}
#endif
}
// =========================== DepEdge =========================
//------------------------------DepPreds---------------------------
#ifndef PRODUCT
#endif
}
// =========================== DepPreds =========================
// Iterator over predecessor edges in the dependence graph.
//------------------------------DepPreds---------------------------
_n = n;
_done = false;
_next_idx = 0;
_end_idx = 0;
} else {
_next_idx = 1;
}
next();
}
//------------------------------next---------------------------
} else {
_done = true;
}
}
// =========================== DepSuccs =========================
// Iterator over successor edges in the dependence graph.
//------------------------------DepSuccs---------------------------
_n = n;
_done = false;
_next_idx = 0;
_next_idx = 0;
_end_idx = 0;
} else {
_next_idx = 0;
}
next();
}
//-------------------------------next---------------------------
} else {
_done = true;
}
}