diff options
author | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
---|---|---|
committer | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
commit | 1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch) | |
tree | c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/gcc/tree-ssa-reassoc.c | |
parent | 283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff) | |
download | toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2 toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip |
Initial checkin of GCC 4.9.0 from trunk (r208799).
Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba
Diffstat (limited to 'gcc-4.9/gcc/tree-ssa-reassoc.c')
-rw-r--r-- | gcc-4.9/gcc/tree-ssa-reassoc.c | 4713 |
1 files changed, 4713 insertions, 0 deletions
diff --git a/gcc-4.9/gcc/tree-ssa-reassoc.c b/gcc-4.9/gcc/tree-ssa-reassoc.c new file mode 100644 index 000000000..e9e29e550 --- /dev/null +++ b/gcc-4.9/gcc/tree-ssa-reassoc.c @@ -0,0 +1,4713 @@ +/* Reassociation for trees. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + Contributed by Daniel Berlin <dan@dberlin.org> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "hash-table.h" +#include "tm.h" +#include "rtl.h" +#include "tm_p.h" +#include "tree.h" +#include "stor-layout.h" +#include "basic-block.h" +#include "gimple-pretty-print.h" +#include "tree-inline.h" +#include "pointer-set.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-fold.h" +#include "tree-eh.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "gimplify-me.h" +#include "gimple-ssa.h" +#include "tree-cfg.h" +#include "tree-phinodes.h" +#include "ssa-iterators.h" +#include "stringpool.h" +#include "tree-ssanames.h" +#include "tree-ssa-loop-niter.h" +#include "tree-ssa-loop.h" +#include "expr.h" +#include "tree-dfa.h" +#include "tree-ssa.h" +#include "tree-iterator.h" +#include "tree-pass.h" +#include "alloc-pool.h" +#include "langhooks.h" +#include "cfgloop.h" +#include "flags.h" +#include "target.h" +#include "params.h" +#include "diagnostic-core.h" + +/* This is a simple global reassociation pass. It is, in part, based + on the LLVM pass of the same name (They do some things more/less + than we do, in different orders, etc). + + It consists of five steps: + + 1. Breaking up subtract operations into addition + negate, where + it would promote the reassociation of adds. + + 2. Left linearization of the expression trees, so that (A+B)+(C+D) + becomes (((A+B)+C)+D), which is easier for us to rewrite later. + During linearization, we place the operands of the binary + expressions into a vector of operand_entry_t + + 3. Optimization of the operand lists, eliminating things like a + + -a, a & a, etc. + + 3a. Combine repeated factors with the same occurrence counts + into a __builtin_powi call that will later be optimized into + an optimal number of multiplies. + + 4. Rewrite the expression trees we linearized and optimized so + they are in proper rank order. + + 5. Repropagate negates, as nothing else will clean it up ATM. + + A bit of theory on #4, since nobody seems to write anything down + about why it makes sense to do it the way they do it: + + We could do this much nicer theoretically, but don't (for reasons + explained after how to do it theoretically nice :P). + + In order to promote the most redundancy elimination, you want + binary expressions whose operands are the same rank (or + preferably, the same value) exposed to the redundancy eliminator, + for possible elimination. + + So the way to do this if we really cared, is to build the new op + tree from the leaves to the roots, merging as you go, and putting the + new op on the end of the worklist, until you are left with one + thing on the worklist. + + IE if you have to rewrite the following set of operands (listed with + rank in parentheses), with opcode PLUS_EXPR: + + a (1), b (1), c (1), d (2), e (2) + + + We start with our merge worklist empty, and the ops list with all of + those on it. + + You want to first merge all leaves of the same rank, as much as + possible. + + So first build a binary op of + + mergetmp = a + b, and put "mergetmp" on the merge worklist. + + Because there is no three operand form of PLUS_EXPR, c is not going to + be exposed to redundancy elimination as a rank 1 operand. + + So you might as well throw it on the merge worklist (you could also + consider it to now be a rank two operand, and merge it with d and e, + but in this case, you then have evicted e from a binary op. So at + least in this situation, you can't win.) + + Then build a binary op of d + e + mergetmp2 = d + e + + and put mergetmp2 on the merge worklist. + + so merge worklist = {mergetmp, c, mergetmp2} + + Continue building binary ops of these operations until you have only + one operation left on the worklist. + + So we have + + build binary op + mergetmp3 = mergetmp + c + + worklist = {mergetmp2, mergetmp3} + + mergetmp4 = mergetmp2 + mergetmp3 + + worklist = {mergetmp4} + + because we have one operation left, we can now just set the original + statement equal to the result of that operation. + + This will at least expose a + b and d + e to redundancy elimination + as binary operations. + + For extra points, you can reuse the old statements to build the + mergetmps, since you shouldn't run out. + + So why don't we do this? + + Because it's expensive, and rarely will help. Most trees we are + reassociating have 3 or less ops. If they have 2 ops, they already + will be written into a nice single binary op. If you have 3 ops, a + single simple check suffices to tell you whether the first two are of the + same rank. If so, you know to order it + + mergetmp = op1 + op2 + newstmt = mergetmp + op3 + + instead of + mergetmp = op2 + op3 + newstmt = mergetmp + op1 + + If all three are of the same rank, you can't expose them all in a + single binary operator anyway, so the above is *still* the best you + can do. + + Thus, this is what we do. When we have three ops left, we check to see + what order to put them in, and call it a day. As a nod to vector sum + reduction, we check if any of the ops are really a phi node that is a + destructive update for the associating op, and keep the destructive + update together for vector sum reduction recognition. */ + + +/* Statistics */ +static struct +{ + int linearized; + int constants_eliminated; + int ops_eliminated; + int rewritten; + int pows_encountered; + int pows_created; +} reassociate_stats; + +/* Operator, rank pair. */ +typedef struct operand_entry +{ + unsigned int rank; + int id; + tree op; + unsigned int count; +} *operand_entry_t; + +static alloc_pool operand_entry_pool; + +/* This is used to assign a unique ID to each struct operand_entry + so that qsort results are identical on different hosts. */ +static int next_operand_entry_id; + +/* Starting rank number for a given basic block, so that we can rank + operations using unmovable instructions in that BB based on the bb + depth. */ +static long *bb_rank; + +/* Operand->rank hashtable. */ +static struct pointer_map_t *operand_rank; + +/* Forward decls. */ +static long get_rank (tree); +static bool reassoc_stmt_dominates_stmt_p (gimple, gimple); + + +/* Bias amount for loop-carried phis. We want this to be larger than + the depth of any reassociation tree we can see, but not larger than + the rank difference between two blocks. */ +#define PHI_LOOP_BIAS (1 << 15) + +/* Rank assigned to a phi statement. If STMT is a loop-carried phi of + an innermost loop, and the phi has only a single use which is inside + the loop, then the rank is the block rank of the loop latch plus an + extra bias for the loop-carried dependence. This causes expressions + calculated into an accumulator variable to be independent for each + iteration of the loop. If STMT is some other phi, the rank is the + block rank of its containing block. */ +static long +phi_rank (gimple stmt) +{ + basic_block bb = gimple_bb (stmt); + struct loop *father = bb->loop_father; + tree res; + unsigned i; + use_operand_p use; + gimple use_stmt; + + /* We only care about real loops (those with a latch). */ + if (!father->latch) + return bb_rank[bb->index]; + + /* Interesting phis must be in headers of innermost loops. */ + if (bb != father->header + || father->inner) + return bb_rank[bb->index]; + + /* Ignore virtual SSA_NAMEs. */ + res = gimple_phi_result (stmt); + if (virtual_operand_p (res)) + return bb_rank[bb->index]; + + /* The phi definition must have a single use, and that use must be + within the loop. Otherwise this isn't an accumulator pattern. */ + if (!single_imm_use (res, &use, &use_stmt) + || gimple_bb (use_stmt)->loop_father != father) + return bb_rank[bb->index]; + + /* Look for phi arguments from within the loop. If found, bias this phi. */ + for (i = 0; i < gimple_phi_num_args (stmt); i++) + { + tree arg = gimple_phi_arg_def (stmt, i); + if (TREE_CODE (arg) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (arg)) + { + gimple def_stmt = SSA_NAME_DEF_STMT (arg); + if (gimple_bb (def_stmt)->loop_father == father) + return bb_rank[father->latch->index] + PHI_LOOP_BIAS; + } + } + + /* Must be an uninteresting phi. */ + return bb_rank[bb->index]; +} + +/* If EXP is an SSA_NAME defined by a PHI statement that represents a + loop-carried dependence of an innermost loop, return TRUE; else + return FALSE. */ +static bool +loop_carried_phi (tree exp) +{ + gimple phi_stmt; + long block_rank; + + if (TREE_CODE (exp) != SSA_NAME + || SSA_NAME_IS_DEFAULT_DEF (exp)) + return false; + + phi_stmt = SSA_NAME_DEF_STMT (exp); + + if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) + return false; + + /* Non-loop-carried phis have block rank. Loop-carried phis have + an additional bias added in. If this phi doesn't have block rank, + it's biased and should not be propagated. */ + block_rank = bb_rank[gimple_bb (phi_stmt)->index]; + + if (phi_rank (phi_stmt) != block_rank) + return true; + + return false; +} + +/* Return the maximum of RANK and the rank that should be propagated + from expression OP. For most operands, this is just the rank of OP. + For loop-carried phis, the value is zero to avoid undoing the bias + in favor of the phi. */ +static long +propagate_rank (long rank, tree op) +{ + long op_rank; + + if (loop_carried_phi (op)) + return rank; + + op_rank = get_rank (op); + + return MAX (rank, op_rank); +} + +/* Look up the operand rank structure for expression E. */ + +static inline long +find_operand_rank (tree e) +{ + void **slot = pointer_map_contains (operand_rank, e); + return slot ? (long) (intptr_t) *slot : -1; +} + +/* Insert {E,RANK} into the operand rank hashtable. */ + +static inline void +insert_operand_rank (tree e, long rank) +{ + void **slot; + gcc_assert (rank > 0); + slot = pointer_map_insert (operand_rank, e); + gcc_assert (!*slot); + *slot = (void *) (intptr_t) rank; +} + +/* Given an expression E, return the rank of the expression. */ + +static long +get_rank (tree e) +{ + /* Constants have rank 0. */ + if (is_gimple_min_invariant (e)) + return 0; + + /* SSA_NAME's have the rank of the expression they are the result + of. + For globals and uninitialized values, the rank is 0. + For function arguments, use the pre-setup rank. + For PHI nodes, stores, asm statements, etc, we use the rank of + the BB. + For simple operations, the rank is the maximum rank of any of + its operands, or the bb_rank, whichever is less. + I make no claims that this is optimal, however, it gives good + results. */ + + /* We make an exception to the normal ranking system to break + dependences of accumulator variables in loops. Suppose we + have a simple one-block loop containing: + + x_1 = phi(x_0, x_2) + b = a + x_1 + c = b + d + x_2 = c + e + + As shown, each iteration of the calculation into x is fully + dependent upon the iteration before it. We would prefer to + see this in the form: + + x_1 = phi(x_0, x_2) + b = a + d + c = b + e + x_2 = c + x_1 + + If the loop is unrolled, the calculations of b and c from + different iterations can be interleaved. + + To obtain this result during reassociation, we bias the rank + of the phi definition x_1 upward, when it is recognized as an + accumulator pattern. The artificial rank causes it to be + added last, providing the desired independence. */ + + if (TREE_CODE (e) == SSA_NAME) + { + gimple stmt; + long rank; + int i, n; + tree op; + + if (SSA_NAME_IS_DEFAULT_DEF (e)) + return find_operand_rank (e); + + stmt = SSA_NAME_DEF_STMT (e); + if (gimple_code (stmt) == GIMPLE_PHI) + return phi_rank (stmt); + + if (!is_gimple_assign (stmt) + || gimple_vdef (stmt)) + return bb_rank[gimple_bb (stmt)->index]; + + /* If we already have a rank for this expression, use that. */ + rank = find_operand_rank (e); + if (rank != -1) + return rank; + + /* Otherwise, find the maximum rank for the operands. As an + exception, remove the bias from loop-carried phis when propagating + the rank so that dependent operations are not also biased. */ + rank = 0; + if (gimple_assign_single_p (stmt)) + { + tree rhs = gimple_assign_rhs1 (stmt); + n = TREE_OPERAND_LENGTH (rhs); + if (n == 0) + rank = propagate_rank (rank, rhs); + else + { + for (i = 0; i < n; i++) + { + op = TREE_OPERAND (rhs, i); + + if (op != NULL_TREE) + rank = propagate_rank (rank, op); + } + } + } + else + { + n = gimple_num_ops (stmt); + for (i = 1; i < n; i++) + { + op = gimple_op (stmt, i); + gcc_assert (op); + rank = propagate_rank (rank, op); + } + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Rank for "); + print_generic_expr (dump_file, e, 0); + fprintf (dump_file, " is %ld\n", (rank + 1)); + } + + /* Note the rank in the hashtable so we don't recompute it. */ + insert_operand_rank (e, (rank + 1)); + return (rank + 1); + } + + /* Globals, etc, are rank 0 */ + return 0; +} + + +/* We want integer ones to end up last no matter what, since they are + the ones we can do the most with. */ +#define INTEGER_CONST_TYPE 1 << 3 +#define FLOAT_CONST_TYPE 1 << 2 +#define OTHER_CONST_TYPE 1 << 1 + +/* Classify an invariant tree into integer, float, or other, so that + we can sort them to be near other constants of the same type. */ +static inline int +constant_type (tree t) +{ + if (INTEGRAL_TYPE_P (TREE_TYPE (t))) + return INTEGER_CONST_TYPE; + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) + return FLOAT_CONST_TYPE; + else + return OTHER_CONST_TYPE; +} + +/* qsort comparison function to sort operand entries PA and PB by rank + so that the sorted array is ordered by rank in decreasing order. */ +static int +sort_by_operand_rank (const void *pa, const void *pb) +{ + const operand_entry_t oea = *(const operand_entry_t *)pa; + const operand_entry_t oeb = *(const operand_entry_t *)pb; + + /* It's nicer for optimize_expression if constants that are likely + to fold when added/multiplied//whatever are put next to each + other. Since all constants have rank 0, order them by type. */ + if (oeb->rank == 0 && oea->rank == 0) + { + if (constant_type (oeb->op) != constant_type (oea->op)) + return constant_type (oeb->op) - constant_type (oea->op); + else + /* To make sorting result stable, we use unique IDs to determine + order. */ + return oeb->id - oea->id; + } + + /* Lastly, make sure the versions that are the same go next to each + other. */ + if ((oeb->rank - oea->rank == 0) + && TREE_CODE (oea->op) == SSA_NAME + && TREE_CODE (oeb->op) == SSA_NAME) + { + /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse + versions of removed SSA_NAMEs, so if possible, prefer to sort + based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. + See PR60418. */ + if (!SSA_NAME_IS_DEFAULT_DEF (oea->op) + && !SSA_NAME_IS_DEFAULT_DEF (oeb->op) + && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) + { + gimple stmta = SSA_NAME_DEF_STMT (oea->op); + gimple stmtb = SSA_NAME_DEF_STMT (oeb->op); + basic_block bba = gimple_bb (stmta); + basic_block bbb = gimple_bb (stmtb); + if (bbb != bba) + { + if (bb_rank[bbb->index] != bb_rank[bba->index]) + return bb_rank[bbb->index] - bb_rank[bba->index]; + } + else + { + bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); + bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); + if (da != db) + return da ? 1 : -1; + } + } + + if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) + return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); + else + return oeb->id - oea->id; + } + + if (oeb->rank != oea->rank) + return oeb->rank - oea->rank; + else + return oeb->id - oea->id; +} + +/* Add an operand entry to *OPS for the tree operand OP. */ + +static void +add_to_ops_vec (vec<operand_entry_t> *ops, tree op) +{ + operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); + + oe->op = op; + oe->rank = get_rank (op); + oe->id = next_operand_entry_id++; + oe->count = 1; + ops->safe_push (oe); +} + +/* Add an operand entry to *OPS for the tree operand OP with repeat + count REPEAT. */ + +static void +add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op, + HOST_WIDE_INT repeat) +{ + operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); + + oe->op = op; + oe->rank = get_rank (op); + oe->id = next_operand_entry_id++; + oe->count = repeat; + ops->safe_push (oe); + + reassociate_stats.pows_encountered++; +} + +/* Return true if STMT is reassociable operation containing a binary + operation with tree code CODE, and is inside LOOP. */ + +static bool +is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) +{ + basic_block bb = gimple_bb (stmt); + + if (gimple_bb (stmt) == NULL) + return false; + + if (!flow_bb_inside_loop_p (loop, bb)) + return false; + + if (is_gimple_assign (stmt) + && gimple_assign_rhs_code (stmt) == code + && has_single_use (gimple_assign_lhs (stmt))) + return true; + + return false; +} + + +/* Given NAME, if NAME is defined by a unary operation OPCODE, return the + operand of the negate operation. Otherwise, return NULL. */ + +static tree +get_unary_op (tree name, enum tree_code opcode) +{ + gimple stmt = SSA_NAME_DEF_STMT (name); + + if (!is_gimple_assign (stmt)) + return NULL_TREE; + + if (gimple_assign_rhs_code (stmt) == opcode) + return gimple_assign_rhs1 (stmt); + return NULL_TREE; +} + +/* If CURR and LAST are a pair of ops that OPCODE allows us to + eliminate through equivalences, do so, remove them from OPS, and + return true. Otherwise, return false. */ + +static bool +eliminate_duplicate_pair (enum tree_code opcode, + vec<operand_entry_t> *ops, + bool *all_done, + unsigned int i, + operand_entry_t curr, + operand_entry_t last) +{ + + /* If we have two of the same op, and the opcode is & |, min, or max, + we can eliminate one of them. + If we have two of the same op, and the opcode is ^, we can + eliminate both of them. */ + + if (last && last->op == curr->op) + { + switch (opcode) + { + case MAX_EXPR: + case MIN_EXPR: + case BIT_IOR_EXPR: + case BIT_AND_EXPR: + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Equivalence: "); + print_generic_expr (dump_file, curr->op, 0); + fprintf (dump_file, " [&|minmax] "); + print_generic_expr (dump_file, last->op, 0); + fprintf (dump_file, " -> "); + print_generic_stmt (dump_file, last->op, 0); + } + + ops->ordered_remove (i); + reassociate_stats.ops_eliminated ++; + + return true; + + case BIT_XOR_EXPR: + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Equivalence: "); + print_generic_expr (dump_file, curr->op, 0); + fprintf (dump_file, " ^ "); + print_generic_expr (dump_file, last->op, 0); + fprintf (dump_file, " -> nothing\n"); + } + + reassociate_stats.ops_eliminated += 2; + + if (ops->length () == 2) + { + ops->create (0); + add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); + *all_done = true; + } + else + { + ops->ordered_remove (i-1); + ops->ordered_remove (i-1); + } + + return true; + + default: + break; + } + } + return false; +} + +static vec<tree> plus_negates; + +/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not + expression, look in OPS for a corresponding positive operation to cancel + it out. If we find one, remove the other from OPS, replace + OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, + return false. */ + +static bool +eliminate_plus_minus_pair (enum tree_code opcode, + vec<operand_entry_t> *ops, + unsigned int currindex, + operand_entry_t curr) +{ + tree negateop; + tree notop; + unsigned int i; + operand_entry_t oe; + + if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) + return false; + + negateop = get_unary_op (curr->op, NEGATE_EXPR); + notop = get_unary_op (curr->op, BIT_NOT_EXPR); + if (negateop == NULL_TREE && notop == NULL_TREE) + return false; + + /* Any non-negated version will have a rank that is one less than + the current rank. So once we hit those ranks, if we don't find + one, we can stop. */ + + for (i = currindex + 1; + ops->iterate (i, &oe) + && oe->rank >= curr->rank - 1 ; + i++) + { + if (oe->op == negateop) + { + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Equivalence: "); + print_generic_expr (dump_file, negateop, 0); + fprintf (dump_file, " + -"); + print_generic_expr (dump_file, oe->op, 0); + fprintf (dump_file, " -> 0\n"); + } + + ops->ordered_remove (i); + add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); + ops->ordered_remove (currindex); + reassociate_stats.ops_eliminated ++; + + return true; + } + else if (oe->op == notop) + { + tree op_type = TREE_TYPE (oe->op); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Equivalence: "); + print_generic_expr (dump_file, notop, 0); + fprintf (dump_file, " + ~"); + print_generic_expr (dump_file, oe->op, 0); + fprintf (dump_file, " -> -1\n"); + } + + ops->ordered_remove (i); + add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); + ops->ordered_remove (currindex); + reassociate_stats.ops_eliminated ++; + + return true; + } + } + + /* CURR->OP is a negate expr in a plus expr: save it for later + inspection in repropagate_negates(). */ + if (negateop != NULL_TREE) + plus_negates.safe_push (curr->op); + + return false; +} + +/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a + bitwise not expression, look in OPS for a corresponding operand to + cancel it out. If we find one, remove the other from OPS, replace + OPS[CURRINDEX] with 0, and return true. Otherwise, return + false. */ + +static bool +eliminate_not_pairs (enum tree_code opcode, + vec<operand_entry_t> *ops, + unsigned int currindex, + operand_entry_t curr) +{ + tree notop; + unsigned int i; + operand_entry_t oe; + + if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) + || TREE_CODE (curr->op) != SSA_NAME) + return false; + + notop = get_unary_op (curr->op, BIT_NOT_EXPR); + if (notop == NULL_TREE) + return false; + + /* Any non-not version will have a rank that is one less than + the current rank. So once we hit those ranks, if we don't find + one, we can stop. */ + + for (i = currindex + 1; + ops->iterate (i, &oe) + && oe->rank >= curr->rank - 1; + i++) + { + if (oe->op == notop) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Equivalence: "); + print_generic_expr (dump_file, notop, 0); + if (opcode == BIT_AND_EXPR) + fprintf (dump_file, " & ~"); + else if (opcode == BIT_IOR_EXPR) + fprintf (dump_file, " | ~"); + print_generic_expr (dump_file, oe->op, 0); + if (opcode == BIT_AND_EXPR) + fprintf (dump_file, " -> 0\n"); + else if (opcode == BIT_IOR_EXPR) + fprintf (dump_file, " -> -1\n"); + } + + if (opcode == BIT_AND_EXPR) + oe->op = build_zero_cst (TREE_TYPE (oe->op)); + else if (opcode == BIT_IOR_EXPR) + oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); + + reassociate_stats.ops_eliminated += ops->length () - 1; + ops->truncate (0); + ops->quick_push (oe); + return true; + } + } + + return false; +} + +/* Use constant value that may be present in OPS to try to eliminate + operands. Note that this function is only really used when we've + eliminated ops for other reasons, or merged constants. Across + single statements, fold already does all of this, plus more. There + is little point in duplicating logic, so I've only included the + identities that I could ever construct testcases to trigger. */ + +static void +eliminate_using_constants (enum tree_code opcode, + vec<operand_entry_t> *ops) +{ + operand_entry_t oelast = ops->last (); + tree type = TREE_TYPE (oelast->op); + + if (oelast->rank == 0 + && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) + { + switch (opcode) + { + case BIT_AND_EXPR: + if (integer_zerop (oelast->op)) + { + if (ops->length () != 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found & 0, removing all other ops\n"); + + reassociate_stats.ops_eliminated += ops->length () - 1; + + ops->truncate (0); + ops->quick_push (oelast); + return; + } + } + else if (integer_all_onesp (oelast->op)) + { + if (ops->length () != 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found & -1, removing\n"); + ops->pop (); + reassociate_stats.ops_eliminated++; + } + } + break; + case BIT_IOR_EXPR: + if (integer_all_onesp (oelast->op)) + { + if (ops->length () != 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found | -1, removing all other ops\n"); + + reassociate_stats.ops_eliminated += ops->length () - 1; + + ops->truncate (0); + ops->quick_push (oelast); + return; + } + } + else if (integer_zerop (oelast->op)) + { + if (ops->length () != 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found | 0, removing\n"); + ops->pop (); + reassociate_stats.ops_eliminated++; + } + } + break; + case MULT_EXPR: + if (integer_zerop (oelast->op) + || (FLOAT_TYPE_P (type) + && !HONOR_NANS (TYPE_MODE (type)) + && !HONOR_SIGNED_ZEROS (TYPE_MODE (type)) + && real_zerop (oelast->op))) + { + if (ops->length () != 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found * 0, removing all other ops\n"); + + reassociate_stats.ops_eliminated += ops->length () - 1; + ops->truncate (1); + ops->quick_push (oelast); + return; + } + } + else if (integer_onep (oelast->op) + || (FLOAT_TYPE_P (type) + && !HONOR_SNANS (TYPE_MODE (type)) + && real_onep (oelast->op))) + { + if (ops->length () != 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found * 1, removing\n"); + ops->pop (); + reassociate_stats.ops_eliminated++; + return; + } + } + break; + case BIT_XOR_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + if (integer_zerop (oelast->op) + || (FLOAT_TYPE_P (type) + && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) + && fold_real_zero_addition_p (type, oelast->op, + opcode == MINUS_EXPR))) + { + if (ops->length () != 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found [|^+] 0, removing\n"); + ops->pop (); + reassociate_stats.ops_eliminated++; + return; + } + } + break; + default: + break; + } + } +} + + +static void linearize_expr_tree (vec<operand_entry_t> *, gimple, + bool, bool); + +/* Structure for tracking and counting operands. */ +typedef struct oecount_s { + int cnt; + int id; + enum tree_code oecode; + tree op; +} oecount; + + +/* The heap for the oecount hashtable and the sorted list of operands. */ +static vec<oecount> cvec; + + +/* Oecount hashtable helpers. */ + +struct oecount_hasher : typed_noop_remove <void> +{ + /* Note that this hash table stores integers, not pointers. + So, observe the casting in the member functions. */ + typedef void value_type; + typedef void compare_type; + static inline hashval_t hash (const value_type *); + static inline bool equal (const value_type *, const compare_type *); +}; + +/* Hash function for oecount. */ + +inline hashval_t +oecount_hasher::hash (const value_type *p) +{ + const oecount *c = &cvec[(size_t)p - 42]; + return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; +} + +/* Comparison function for oecount. */ + +inline bool +oecount_hasher::equal (const value_type *p1, const compare_type *p2) +{ + const oecount *c1 = &cvec[(size_t)p1 - 42]; + const oecount *c2 = &cvec[(size_t)p2 - 42]; + return (c1->oecode == c2->oecode + && c1->op == c2->op); +} + +/* Comparison function for qsort sorting oecount elements by count. */ + +static int +oecount_cmp (const void *p1, const void *p2) +{ + const oecount *c1 = (const oecount *)p1; + const oecount *c2 = (const oecount *)p2; + if (c1->cnt != c2->cnt) + return c1->cnt - c2->cnt; + else + /* If counts are identical, use unique IDs to stabilize qsort. */ + return c1->id - c2->id; +} + +/* Return TRUE iff STMT represents a builtin call that raises OP + to some exponent. */ + +static bool +stmt_is_power_of_op (gimple stmt, tree op) +{ + tree fndecl; + + if (!is_gimple_call (stmt)) + return false; + + fndecl = gimple_call_fndecl (stmt); + + if (!fndecl + || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) + return false; + + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) + { + CASE_FLT_FN (BUILT_IN_POW): + CASE_FLT_FN (BUILT_IN_POWI): + return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); + + default: + return false; + } +} + +/* Given STMT which is a __builtin_pow* call, decrement its exponent + in place and return the result. Assumes that stmt_is_power_of_op + was previously called for STMT and returned TRUE. */ + +static HOST_WIDE_INT +decrement_power (gimple stmt) +{ + REAL_VALUE_TYPE c, cint; + HOST_WIDE_INT power; + tree arg1; + + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) + { + CASE_FLT_FN (BUILT_IN_POW): + arg1 = gimple_call_arg (stmt, 1); + c = TREE_REAL_CST (arg1); + power = real_to_integer (&c) - 1; + real_from_integer (&cint, VOIDmode, power, 0, 0); + gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); + return power; + + CASE_FLT_FN (BUILT_IN_POWI): + arg1 = gimple_call_arg (stmt, 1); + power = TREE_INT_CST_LOW (arg1) - 1; + gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); + return power; + + default: + gcc_unreachable (); + } +} + +/* Find the single immediate use of STMT's LHS, and replace it + with OP. Remove STMT. If STMT's LHS is the same as *DEF, + replace *DEF with OP as well. */ + +static void +propagate_op_to_single_use (tree op, gimple stmt, tree *def) +{ + tree lhs; + gimple use_stmt; + use_operand_p use; + gimple_stmt_iterator gsi; + + if (is_gimple_call (stmt)) + lhs = gimple_call_lhs (stmt); + else + lhs = gimple_assign_lhs (stmt); + + gcc_assert (has_single_use (lhs)); + single_imm_use (lhs, &use, &use_stmt); + if (lhs == *def) + *def = op; + SET_USE (use, op); + if (TREE_CODE (op) != SSA_NAME) + update_stmt (use_stmt); + gsi = gsi_for_stmt (stmt); + unlink_stmt_vdef (stmt); + gsi_remove (&gsi, true); + release_defs (stmt); +} + +/* Walks the linear chain with result *DEF searching for an operation + with operand OP and code OPCODE removing that from the chain. *DEF + is updated if there is only one operand but no operation left. */ + +static void +zero_one_operation (tree *def, enum tree_code opcode, tree op) +{ + gimple stmt = SSA_NAME_DEF_STMT (*def); + + do + { + tree name; + + if (opcode == MULT_EXPR + && stmt_is_power_of_op (stmt, op)) + { + if (decrement_power (stmt) == 1) + propagate_op_to_single_use (op, stmt, def); + return; + } + + name = gimple_assign_rhs1 (stmt); + + /* If this is the operation we look for and one of the operands + is ours simply propagate the other operand into the stmts + single use. */ + if (gimple_assign_rhs_code (stmt) == opcode + && (name == op + || gimple_assign_rhs2 (stmt) == op)) + { + if (name == op) + name = gimple_assign_rhs2 (stmt); + propagate_op_to_single_use (name, stmt, def); + return; + } + + /* We might have a multiply of two __builtin_pow* calls, and + the operand might be hiding in the rightmost one. */ + if (opcode == MULT_EXPR + && gimple_assign_rhs_code (stmt) == opcode + && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME + && has_single_use (gimple_assign_rhs2 (stmt))) + { + gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); + if (stmt_is_power_of_op (stmt2, op)) + { + if (decrement_power (stmt2) == 1) + propagate_op_to_single_use (op, stmt2, def); + return; + } + } + + /* Continue walking the chain. */ + gcc_assert (name != op + && TREE_CODE (name) == SSA_NAME); + stmt = SSA_NAME_DEF_STMT (name); + } + while (1); +} + +/* Returns true if statement S1 dominates statement S2. Like + stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ + +static bool +reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2) +{ + basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); + + /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) + SSA_NAME. Assume it lives at the beginning of function and + thus dominates everything. */ + if (!bb1 || s1 == s2) + return true; + + /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ + if (!bb2) + return false; + + if (bb1 == bb2) + { + /* PHIs in the same basic block are assumed to be + executed all in parallel, if only one stmt is a PHI, + it dominates the other stmt in the same basic block. */ + if (gimple_code (s1) == GIMPLE_PHI) + return true; + + if (gimple_code (s2) == GIMPLE_PHI) + return false; + + gcc_assert (gimple_uid (s1) && gimple_uid (s2)); + + if (gimple_uid (s1) < gimple_uid (s2)) + return true; + + if (gimple_uid (s1) > gimple_uid (s2)) + return false; + + gimple_stmt_iterator gsi = gsi_for_stmt (s1); + unsigned int uid = gimple_uid (s1); + for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple s = gsi_stmt (gsi); + if (gimple_uid (s) != uid) + break; + if (s == s2) + return true; + } + + return false; + } + + return dominated_by_p (CDI_DOMINATORS, bb2, bb1); +} + +/* Insert STMT after INSERT_POINT. */ + +static void +insert_stmt_after (gimple stmt, gimple insert_point) +{ + gimple_stmt_iterator gsi; + basic_block bb; + + if (gimple_code (insert_point) == GIMPLE_PHI) + bb = gimple_bb (insert_point); + else if (!stmt_ends_bb_p (insert_point)) + { + gsi = gsi_for_stmt (insert_point); + gimple_set_uid (stmt, gimple_uid (insert_point)); + gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + return; + } + else + /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, + thus if it must end a basic block, it should be a call that can + throw, or some assignment that can throw. If it throws, the LHS + of it will not be initialized though, so only valid places using + the SSA_NAME should be dominated by the fallthru edge. */ + bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; + gsi = gsi_after_labels (bb); + if (gsi_end_p (gsi)) + { + gimple_stmt_iterator gsi2 = gsi_last_bb (bb); + gimple_set_uid (stmt, + gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); + } + else + gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); +} + +/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for + the result. Places the statement after the definition of either + OP1 or OP2. Returns the new statement. */ + +static gimple +build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) +{ + gimple op1def = NULL, op2def = NULL; + gimple_stmt_iterator gsi; + tree op; + gimple sum; + + /* Create the addition statement. */ + op = make_ssa_name (type, NULL); + sum = gimple_build_assign_with_ops (opcode, op, op1, op2); + + /* Find an insertion place and insert. */ + if (TREE_CODE (op1) == SSA_NAME) + op1def = SSA_NAME_DEF_STMT (op1); + if (TREE_CODE (op2) == SSA_NAME) + op2def = SSA_NAME_DEF_STMT (op2); + if ((!op1def || gimple_nop_p (op1def)) + && (!op2def || gimple_nop_p (op2def))) + { + gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); + if (gsi_end_p (gsi)) + { + gimple_stmt_iterator gsi2 + = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); + gimple_set_uid (sum, + gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); + } + else + gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); + gsi_insert_before (&gsi, sum, GSI_NEW_STMT); + } + else + { + gimple insert_point; + if ((!op1def || gimple_nop_p (op1def)) + || (op2def && !gimple_nop_p (op2def) + && reassoc_stmt_dominates_stmt_p (op1def, op2def))) + insert_point = op2def; + else + insert_point = op1def; + insert_stmt_after (sum, insert_point); + } + update_stmt (sum); + + return sum; +} + +/* Perform un-distribution of divisions and multiplications. + A * X + B * X is transformed into (A + B) * X and A / X + B / X + to (A + B) / X for real X. + + The algorithm is organized as follows. + + - First we walk the addition chain *OPS looking for summands that + are defined by a multiplication or a real division. This results + in the candidates bitmap with relevant indices into *OPS. + + - Second we build the chains of multiplications or divisions for + these candidates, counting the number of occurrences of (operand, code) + pairs in all of the candidates chains. + + - Third we sort the (operand, code) pairs by number of occurrence and + process them starting with the pair with the most uses. + + * For each such pair we walk the candidates again to build a + second candidate bitmap noting all multiplication/division chains + that have at least one occurrence of (operand, code). + + * We build an alternate addition chain only covering these + candidates with one (operand, code) operation removed from their + multiplication/division chain. + + * The first candidate gets replaced by the alternate addition chain + multiplied/divided by the operand. + + * All candidate chains get disabled for further processing and + processing of (operand, code) pairs continues. + + The alternate addition chains built are re-processed by the main + reassociation algorithm which allows optimizing a * x * y + b * y * x + to (a + b ) * x * y in one invocation of the reassociation pass. */ + +static bool +undistribute_ops_list (enum tree_code opcode, + vec<operand_entry_t> *ops, struct loop *loop) +{ + unsigned int length = ops->length (); + operand_entry_t oe1; + unsigned i, j; + sbitmap candidates, candidates2; + unsigned nr_candidates, nr_candidates2; + sbitmap_iterator sbi0; + vec<operand_entry_t> *subops; + hash_table <oecount_hasher> ctable; + bool changed = false; + int next_oecount_id = 0; + + if (length <= 1 + || opcode != PLUS_EXPR) + return false; + + /* Build a list of candidates to process. */ + candidates = sbitmap_alloc (length); + bitmap_clear (candidates); + nr_candidates = 0; + FOR_EACH_VEC_ELT (*ops, i, oe1) + { + enum tree_code dcode; + gimple oe1def; + + if (TREE_CODE (oe1->op) != SSA_NAME) + continue; + oe1def = SSA_NAME_DEF_STMT (oe1->op); + if (!is_gimple_assign (oe1def)) + continue; + dcode = gimple_assign_rhs_code (oe1def); + if ((dcode != MULT_EXPR + && dcode != RDIV_EXPR) + || !is_reassociable_op (oe1def, dcode, loop)) + continue; + + bitmap_set_bit (candidates, i); + nr_candidates++; + } + + if (nr_candidates < 2) + { + sbitmap_free (candidates); + return false; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "searching for un-distribute opportunities "); + print_generic_expr (dump_file, + (*ops)[bitmap_first_set_bit (candidates)]->op, 0); + fprintf (dump_file, " %d\n", nr_candidates); + } + + /* Build linearized sub-operand lists and the counting table. */ + cvec.create (0); + ctable.create (15); + /* ??? Macro arguments cannot have multi-argument template types in + them. This typedef is needed to workaround that limitation. */ + typedef vec<operand_entry_t> vec_operand_entry_t_heap; + subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); + EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) + { + gimple oedef; + enum tree_code oecode; + unsigned j; + + oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); + oecode = gimple_assign_rhs_code (oedef); + linearize_expr_tree (&subops[i], oedef, + associative_tree_code (oecode), false); + + FOR_EACH_VEC_ELT (subops[i], j, oe1) + { + oecount c; + void **slot; + size_t idx; + c.oecode = oecode; + c.cnt = 1; + c.id = next_oecount_id++; + c.op = oe1->op; + cvec.safe_push (c); + idx = cvec.length () + 41; + slot = ctable.find_slot ((void *)idx, INSERT); + if (!*slot) + { + *slot = (void *)idx; + } + else + { + cvec.pop (); + cvec[(size_t)*slot - 42].cnt++; + } + } + } + ctable.dispose (); + + /* Sort the counting table. */ + cvec.qsort (oecount_cmp); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + oecount *c; + fprintf (dump_file, "Candidates:\n"); + FOR_EACH_VEC_ELT (cvec, j, c) + { + fprintf (dump_file, " %u %s: ", c->cnt, + c->oecode == MULT_EXPR + ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); + print_generic_expr (dump_file, c->op, 0); + fprintf (dump_file, "\n"); + } + } + + /* Process the (operand, code) pairs in order of most occurrence. */ + candidates2 = sbitmap_alloc (length); + while (!cvec.is_empty ()) + { + oecount *c = &cvec.last (); + if (c->cnt < 2) + break; + + /* Now collect the operands in the outer chain that contain + the common operand in their inner chain. */ + bitmap_clear (candidates2); + nr_candidates2 = 0; + EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) + { + gimple oedef; + enum tree_code oecode; + unsigned j; + tree op = (*ops)[i]->op; + + /* If we undistributed in this chain already this may be + a constant. */ + if (TREE_CODE (op) != SSA_NAME) + continue; + + oedef = SSA_NAME_DEF_STMT (op); + oecode = gimple_assign_rhs_code (oedef); + if (oecode != c->oecode) + continue; + + FOR_EACH_VEC_ELT (subops[i], j, oe1) + { + if (oe1->op == c->op) + { + bitmap_set_bit (candidates2, i); + ++nr_candidates2; + break; + } + } + } + + if (nr_candidates2 >= 2) + { + operand_entry_t oe1, oe2; + gimple prod; + int first = bitmap_first_set_bit (candidates2); + + /* Build the new addition chain. */ + oe1 = (*ops)[first]; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Building ("); + print_generic_expr (dump_file, oe1->op, 0); + } + zero_one_operation (&oe1->op, c->oecode, c->op); + EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) + { + gimple sum; + oe2 = (*ops)[i]; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " + "); + print_generic_expr (dump_file, oe2->op, 0); + } + zero_one_operation (&oe2->op, c->oecode, c->op); + sum = build_and_add_sum (TREE_TYPE (oe1->op), + oe1->op, oe2->op, opcode); + oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); + oe2->rank = 0; + oe1->op = gimple_get_lhs (sum); + } + + /* Apply the multiplication/division. */ + prod = build_and_add_sum (TREE_TYPE (oe1->op), + oe1->op, c->op, c->oecode); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); + print_generic_expr (dump_file, c->op, 0); + fprintf (dump_file, "\n"); + } + + /* Record it in the addition chain and disable further + undistribution with this op. */ + oe1->op = gimple_assign_lhs (prod); + oe1->rank = get_rank (oe1->op); + subops[first].release (); + + changed = true; + } + + cvec.pop (); + } + + for (i = 0; i < ops->length (); ++i) + subops[i].release (); + free (subops); + cvec.release (); + sbitmap_free (candidates); + sbitmap_free (candidates2); + + return changed; +} + +/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison + expression, examine the other OPS to see if any of them are comparisons + of the same values, which we may be able to combine or eliminate. + For example, we can rewrite (a < b) | (a == b) as (a <= b). */ + +static bool +eliminate_redundant_comparison (enum tree_code opcode, + vec<operand_entry_t> *ops, + unsigned int currindex, + operand_entry_t curr) +{ + tree op1, op2; + enum tree_code lcode, rcode; + gimple def1, def2; + int i; + operand_entry_t oe; + + if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) + return false; + + /* Check that CURR is a comparison. */ + if (TREE_CODE (curr->op) != SSA_NAME) + return false; + def1 = SSA_NAME_DEF_STMT (curr->op); + if (!is_gimple_assign (def1)) + return false; + lcode = gimple_assign_rhs_code (def1); + if (TREE_CODE_CLASS (lcode) != tcc_comparison) + return false; + op1 = gimple_assign_rhs1 (def1); + op2 = gimple_assign_rhs2 (def1); + + /* Now look for a similar comparison in the remaining OPS. */ + for (i = currindex + 1; ops->iterate (i, &oe); i++) + { + tree t; + + if (TREE_CODE (oe->op) != SSA_NAME) + continue; + def2 = SSA_NAME_DEF_STMT (oe->op); + if (!is_gimple_assign (def2)) + continue; + rcode = gimple_assign_rhs_code (def2); + if (TREE_CODE_CLASS (rcode) != tcc_comparison) + continue; + + /* If we got here, we have a match. See if we can combine the + two comparisons. */ + if (opcode == BIT_IOR_EXPR) + t = maybe_fold_or_comparisons (lcode, op1, op2, + rcode, gimple_assign_rhs1 (def2), + gimple_assign_rhs2 (def2)); + else + t = maybe_fold_and_comparisons (lcode, op1, op2, + rcode, gimple_assign_rhs1 (def2), + gimple_assign_rhs2 (def2)); + if (!t) + continue; + + /* maybe_fold_and_comparisons and maybe_fold_or_comparisons + always give us a boolean_type_node value back. If the original + BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, + we need to convert. */ + if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) + t = fold_convert (TREE_TYPE (curr->op), t); + + if (TREE_CODE (t) != INTEGER_CST + && !operand_equal_p (t, curr->op, 0)) + { + enum tree_code subcode; + tree newop1, newop2; + if (!COMPARISON_CLASS_P (t)) + continue; + extract_ops_from_tree (t, &subcode, &newop1, &newop2); + STRIP_USELESS_TYPE_CONVERSION (newop1); + STRIP_USELESS_TYPE_CONVERSION (newop2); + if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) + continue; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Equivalence: "); + print_generic_expr (dump_file, curr->op, 0); + fprintf (dump_file, " %s ", op_symbol_code (opcode)); + print_generic_expr (dump_file, oe->op, 0); + fprintf (dump_file, " -> "); + print_generic_expr (dump_file, t, 0); + fprintf (dump_file, "\n"); + } + + /* Now we can delete oe, as it has been subsumed by the new combined + expression t. */ + ops->ordered_remove (i); + reassociate_stats.ops_eliminated ++; + + /* If t is the same as curr->op, we're done. Otherwise we must + replace curr->op with t. Special case is if we got a constant + back, in which case we add it to the end instead of in place of + the current entry. */ + if (TREE_CODE (t) == INTEGER_CST) + { + ops->ordered_remove (currindex); + add_to_ops_vec (ops, t); + } + else if (!operand_equal_p (t, curr->op, 0)) + { + gimple sum; + enum tree_code subcode; + tree newop1; + tree newop2; + gcc_assert (COMPARISON_CLASS_P (t)); + extract_ops_from_tree (t, &subcode, &newop1, &newop2); + STRIP_USELESS_TYPE_CONVERSION (newop1); + STRIP_USELESS_TYPE_CONVERSION (newop2); + gcc_checking_assert (is_gimple_val (newop1) + && is_gimple_val (newop2)); + sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); + curr->op = gimple_get_lhs (sum); + } + return true; + } + + return false; +} + +/* Perform various identities and other optimizations on the list of + operand entries, stored in OPS. The tree code for the binary + operation between all the operands is OPCODE. */ + +static void +optimize_ops_list (enum tree_code opcode, + vec<operand_entry_t> *ops) +{ + unsigned int length = ops->length (); + unsigned int i; + operand_entry_t oe; + operand_entry_t oelast = NULL; + bool iterate = false; + + if (length == 1) + return; + + oelast = ops->last (); + + /* If the last two are constants, pop the constants off, merge them + and try the next two. */ + if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) + { + operand_entry_t oelm1 = (*ops)[length - 2]; + + if (oelm1->rank == 0 + && is_gimple_min_invariant (oelm1->op) + && useless_type_conversion_p (TREE_TYPE (oelm1->op), + TREE_TYPE (oelast->op))) + { + tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), + oelm1->op, oelast->op); + + if (folded && is_gimple_min_invariant (folded)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Merging constants\n"); + + ops->pop (); + ops->pop (); + + add_to_ops_vec (ops, folded); + reassociate_stats.constants_eliminated++; + + optimize_ops_list (opcode, ops); + return; + } + } + } + + eliminate_using_constants (opcode, ops); + oelast = NULL; + + for (i = 0; ops->iterate (i, &oe);) + { + bool done = false; + + if (eliminate_not_pairs (opcode, ops, i, oe)) + return; + if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) + || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) + || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) + { + if (done) + return; + iterate = true; + oelast = NULL; + continue; + } + oelast = oe; + i++; + } + + length = ops->length (); + oelast = ops->last (); + + if (iterate) + optimize_ops_list (opcode, ops); +} + +/* The following functions are subroutines to optimize_range_tests and allow + it to try to change a logical combination of comparisons into a range + test. + + For example, both + X == 2 || X == 5 || X == 3 || X == 4 + and + X >= 2 && X <= 5 + are converted to + (unsigned) (X - 2) <= 3 + + For more information see comments above fold_test_range in fold-const.c, + this implementation is for GIMPLE. */ + +struct range_entry +{ + tree exp; + tree low; + tree high; + bool in_p; + bool strict_overflow_p; + unsigned int idx, next; +}; + +/* This is similar to make_range in fold-const.c, but on top of + GIMPLE instead of trees. If EXP is non-NULL, it should be + an SSA_NAME and STMT argument is ignored, otherwise STMT + argument should be a GIMPLE_COND. */ + +static void +init_range_entry (struct range_entry *r, tree exp, gimple stmt) +{ + int in_p; + tree low, high; + bool is_bool, strict_overflow_p; + + r->exp = NULL_TREE; + r->in_p = false; + r->strict_overflow_p = false; + r->low = NULL_TREE; + r->high = NULL_TREE; + if (exp != NULL_TREE + && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) + return; + + /* Start with simply saying "EXP != 0" and then look at the code of EXP + and see if we can refine the range. Some of the cases below may not + happen, but it doesn't seem worth worrying about this. We "continue" + the outer loop when we've changed something; otherwise we "break" + the switch, which will "break" the while. */ + low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; + high = low; + in_p = 0; + strict_overflow_p = false; + is_bool = false; + if (exp == NULL_TREE) + is_bool = true; + else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) + { + if (TYPE_UNSIGNED (TREE_TYPE (exp))) + is_bool = true; + else + return; + } + else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) + is_bool = true; + + while (1) + { + enum tree_code code; + tree arg0, arg1, exp_type; + tree nexp; + location_t loc; + + if (exp != NULL_TREE) + { + if (TREE_CODE (exp) != SSA_NAME + || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) + break; + + stmt = SSA_NAME_DEF_STMT (exp); + if (!is_gimple_assign (stmt)) + break; + + code = gimple_assign_rhs_code (stmt); + arg0 = gimple_assign_rhs1 (stmt); + arg1 = gimple_assign_rhs2 (stmt); + exp_type = TREE_TYPE (exp); + } + else + { + code = gimple_cond_code (stmt); + arg0 = gimple_cond_lhs (stmt); + arg1 = gimple_cond_rhs (stmt); + exp_type = boolean_type_node; + } + + if (TREE_CODE (arg0) != SSA_NAME) + break; + loc = gimple_location (stmt); + switch (code) + { + case BIT_NOT_EXPR: + if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE + /* Ensure the range is either +[-,0], +[0,0], + -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or + -[1,1]. If it is e.g. +[-,-] or -[-,-] + or similar expression of unconditional true or + false, it should not be negated. */ + && ((high && integer_zerop (high)) + || (low && integer_onep (low)))) + { + in_p = !in_p; + exp = arg0; + continue; + } + break; + case SSA_NAME: + exp = arg0; + continue; + CASE_CONVERT: + if (is_bool) + goto do_default; + if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) + { + if (TYPE_UNSIGNED (TREE_TYPE (arg0))) + is_bool = true; + else + return; + } + else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) + is_bool = true; + goto do_default; + case EQ_EXPR: + case NE_EXPR: + case LT_EXPR: + case LE_EXPR: + case GE_EXPR: + case GT_EXPR: + is_bool = true; + /* FALLTHRU */ + default: + if (!is_bool) + return; + do_default: + nexp = make_range_step (loc, code, arg0, arg1, exp_type, + &low, &high, &in_p, + &strict_overflow_p); + if (nexp != NULL_TREE) + { + exp = nexp; + gcc_assert (TREE_CODE (exp) == SSA_NAME); + continue; + } + break; + } + break; + } + if (is_bool) + { + r->exp = exp; + r->in_p = in_p; + r->low = low; + r->high = high; + r->strict_overflow_p = strict_overflow_p; + } +} + +/* Comparison function for qsort. Sort entries + without SSA_NAME exp first, then with SSA_NAMEs sorted + by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs + by increasing ->low and if ->low is the same, by increasing + ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE + maximum. */ + +static int +range_entry_cmp (const void *a, const void *b) +{ + const struct range_entry *p = (const struct range_entry *) a; + const struct range_entry *q = (const struct range_entry *) b; + + if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) + { + if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) + { + /* Group range_entries for the same SSA_NAME together. */ + if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) + return -1; + else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) + return 1; + /* If ->low is different, NULL low goes first, then by + ascending low. */ + if (p->low != NULL_TREE) + { + if (q->low != NULL_TREE) + { + tree tem = fold_binary (LT_EXPR, boolean_type_node, + p->low, q->low); + if (tem && integer_onep (tem)) + return -1; + tem = fold_binary (GT_EXPR, boolean_type_node, + p->low, q->low); + if (tem && integer_onep (tem)) + return 1; + } + else + return 1; + } + else if (q->low != NULL_TREE) + return -1; + /* If ->high is different, NULL high goes last, before that by + ascending high. */ + if (p->high != NULL_TREE) + { + if (q->high != NULL_TREE) + { + tree tem = fold_binary (LT_EXPR, boolean_type_node, + p->high, q->high); + if (tem && integer_onep (tem)) + return -1; + tem = fold_binary (GT_EXPR, boolean_type_node, + p->high, q->high); + if (tem && integer_onep (tem)) + return 1; + } + else + return -1; + } + else if (p->high != NULL_TREE) + return 1; + /* If both ranges are the same, sort below by ascending idx. */ + } + else + return 1; + } + else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) + return -1; + + if (p->idx < q->idx) + return -1; + else + { + gcc_checking_assert (p->idx > q->idx); + return 1; + } +} + +/* Helper routine of optimize_range_test. + [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for + RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, + OPCODE and OPS are arguments of optimize_range_tests. Return + true if the range merge has been successful. + If OPCODE is ERROR_MARK, this is called from within + maybe_optimize_range_tests and is performing inter-bb range optimization. + In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in + oe->rank. */ + +static bool +update_range_test (struct range_entry *range, struct range_entry *otherrange, + unsigned int count, enum tree_code opcode, + vec<operand_entry_t> *ops, tree exp, bool in_p, + tree low, tree high, bool strict_overflow_p) +{ + operand_entry_t oe = (*ops)[range->idx]; + tree op = oe->op; + gimple stmt = op ? SSA_NAME_DEF_STMT (op) : + last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); + location_t loc = gimple_location (stmt); + tree optype = op ? TREE_TYPE (op) : boolean_type_node; + tree tem = build_range_check (loc, optype, exp, in_p, low, high); + enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; + gimple_stmt_iterator gsi; + + if (tem == NULL_TREE) + return false; + + if (strict_overflow_p && issue_strict_overflow_warning (wc)) + warning_at (loc, OPT_Wstrict_overflow, + "assuming signed overflow does not occur " + "when simplifying range test"); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + struct range_entry *r; + fprintf (dump_file, "Optimizing range tests "); + print_generic_expr (dump_file, range->exp, 0); + fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); + print_generic_expr (dump_file, range->low, 0); + fprintf (dump_file, ", "); + print_generic_expr (dump_file, range->high, 0); + fprintf (dump_file, "]"); + for (r = otherrange; r < otherrange + count; r++) + { + fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); + print_generic_expr (dump_file, r->low, 0); + fprintf (dump_file, ", "); + print_generic_expr (dump_file, r->high, 0); + fprintf (dump_file, "]"); + } + fprintf (dump_file, "\n into "); + print_generic_expr (dump_file, tem, 0); + fprintf (dump_file, "\n"); + } + + if (opcode == BIT_IOR_EXPR + || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) + tem = invert_truthvalue_loc (loc, tem); + + tem = fold_convert_loc (loc, optype, tem); + gsi = gsi_for_stmt (stmt); + /* In rare cases range->exp can be equal to lhs of stmt. + In that case we have to insert after the stmt rather then before + it. */ + if (op == range->exp) + tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false, + GSI_CONTINUE_LINKING); + else + { + tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, + GSI_SAME_STMT); + gsi_prev (&gsi); + } + for (; !gsi_end_p (gsi); gsi_prev (&gsi)) + if (gimple_uid (gsi_stmt (gsi))) + break; + else + gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt)); + + oe->op = tem; + range->exp = exp; + range->low = low; + range->high = high; + range->in_p = in_p; + range->strict_overflow_p = false; + + for (range = otherrange; range < otherrange + count; range++) + { + oe = (*ops)[range->idx]; + /* Now change all the other range test immediate uses, so that + those tests will be optimized away. */ + if (opcode == ERROR_MARK) + { + if (oe->op) + oe->op = build_int_cst (TREE_TYPE (oe->op), + oe->rank == BIT_IOR_EXPR ? 0 : 1); + else + oe->op = (oe->rank == BIT_IOR_EXPR + ? boolean_false_node : boolean_true_node); + } + else + oe->op = error_mark_node; + range->exp = NULL_TREE; + } + return true; +} + +/* Optimize X == CST1 || X == CST2 + if popcount (CST1 ^ CST2) == 1 into + (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). + Similarly for ranges. E.g. + X != 2 && X != 3 && X != 10 && X != 11 + will be transformed by the previous optimization into + !((X - 2U) <= 1U || (X - 10U) <= 1U) + and this loop can transform that into + !(((X & ~8) - 2U) <= 1U). */ + +static bool +optimize_range_tests_xor (enum tree_code opcode, tree type, + tree lowi, tree lowj, tree highi, tree highj, + vec<operand_entry_t> *ops, + struct range_entry *rangei, + struct range_entry *rangej) +{ + tree lowxor, highxor, tem, exp; + /* Check highi ^ lowi == highj ^ lowj and + popcount (highi ^ lowi) == 1. */ + lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); + if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) + return false; + if (tree_log2 (lowxor) < 0) + return false; + highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); + if (!tree_int_cst_equal (lowxor, highxor)) + return false; + + tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); + exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); + lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); + highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); + if (update_range_test (rangei, rangej, 1, opcode, ops, exp, + rangei->in_p, lowj, highj, + rangei->strict_overflow_p + || rangej->strict_overflow_p)) + return true; + return false; +} + +/* Optimize X == CST1 || X == CST2 + if popcount (CST2 - CST1) == 1 into + ((X - CST1) & ~(CST2 - CST1)) == 0. + Similarly for ranges. E.g. + X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 + || X == 75 || X == 45 + will be transformed by the previous optimization into + (X - 43U) <= 3U || (X - 75U) <= 3U + and this loop can transform that into + ((X - 43U) & ~(75U - 43U)) <= 3U. */ +static bool +optimize_range_tests_diff (enum tree_code opcode, tree type, + tree lowi, tree lowj, tree highi, tree highj, + vec<operand_entry_t> *ops, + struct range_entry *rangei, + struct range_entry *rangej) +{ + tree tem1, tem2, mask; + /* Check highi - lowi == highj - lowj. */ + tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); + if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) + return false; + tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); + if (!tree_int_cst_equal (tem1, tem2)) + return false; + /* Check popcount (lowj - lowi) == 1. */ + tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); + if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) + return false; + if (tree_log2 (tem1) < 0) + return false; + + mask = fold_build1 (BIT_NOT_EXPR, type, tem1); + tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi); + tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); + lowj = build_int_cst (type, 0); + if (update_range_test (rangei, rangej, 1, opcode, ops, tem1, + rangei->in_p, lowj, tem2, + rangei->strict_overflow_p + || rangej->strict_overflow_p)) + return true; + return false; +} + +/* It does some common checks for function optimize_range_tests_xor and + optimize_range_tests_diff. + If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. + Else it calls optimize_range_tests_diff. */ + +static bool +optimize_range_tests_1 (enum tree_code opcode, int first, int length, + bool optimize_xor, vec<operand_entry_t> *ops, + struct range_entry *ranges) +{ + int i, j; + bool any_changes = false; + for (i = first; i < length; i++) + { + tree lowi, highi, lowj, highj, type, tem; + + if (ranges[i].exp == NULL_TREE || ranges[i].in_p) + continue; + type = TREE_TYPE (ranges[i].exp); + if (!INTEGRAL_TYPE_P (type)) + continue; + lowi = ranges[i].low; + if (lowi == NULL_TREE) + lowi = TYPE_MIN_VALUE (type); + highi = ranges[i].high; + if (highi == NULL_TREE) + continue; + for (j = i + 1; j < length && j < i + 64; j++) + { + bool changes; + if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) + continue; + lowj = ranges[j].low; + if (lowj == NULL_TREE) + continue; + highj = ranges[j].high; + if (highj == NULL_TREE) + highj = TYPE_MAX_VALUE (type); + /* Check lowj > highi. */ + tem = fold_binary (GT_EXPR, boolean_type_node, + lowj, highi); + if (tem == NULL_TREE || !integer_onep (tem)) + continue; + if (optimize_xor) + changes = optimize_range_tests_xor (opcode, type, lowi, lowj, + highi, highj, ops, + ranges + i, ranges + j); + else + changes = optimize_range_tests_diff (opcode, type, lowi, lowj, + highi, highj, ops, + ranges + i, ranges + j); + if (changes) + { + any_changes = true; + break; + } + } + } + return any_changes; +} + +/* Optimize range tests, similarly how fold_range_test optimizes + it on trees. The tree code for the binary + operation between all the operands is OPCODE. + If OPCODE is ERROR_MARK, optimize_range_tests is called from within + maybe_optimize_range_tests for inter-bb range optimization. + In that case if oe->op is NULL, oe->id is bb->index whose + GIMPLE_COND is && or ||ed into the test, and oe->rank says + the actual opcode. */ + +static bool +optimize_range_tests (enum tree_code opcode, + vec<operand_entry_t> *ops) +{ + unsigned int length = ops->length (), i, j, first; + operand_entry_t oe; + struct range_entry *ranges; + bool any_changes = false; + + if (length == 1) + return false; + + ranges = XNEWVEC (struct range_entry, length); + for (i = 0; i < length; i++) + { + oe = (*ops)[i]; + ranges[i].idx = i; + init_range_entry (ranges + i, oe->op, + oe->op ? NULL : + last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); + /* For | invert it now, we will invert it again before emitting + the optimized expression. */ + if (opcode == BIT_IOR_EXPR + || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) + ranges[i].in_p = !ranges[i].in_p; + } + + qsort (ranges, length, sizeof (*ranges), range_entry_cmp); + for (i = 0; i < length; i++) + if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) + break; + + /* Try to merge ranges. */ + for (first = i; i < length; i++) + { + tree low = ranges[i].low; + tree high = ranges[i].high; + int in_p = ranges[i].in_p; + bool strict_overflow_p = ranges[i].strict_overflow_p; + int update_fail_count = 0; + + for (j = i + 1; j < length; j++) + { + if (ranges[i].exp != ranges[j].exp) + break; + if (!merge_ranges (&in_p, &low, &high, in_p, low, high, + ranges[j].in_p, ranges[j].low, ranges[j].high)) + break; + strict_overflow_p |= ranges[j].strict_overflow_p; + } + + if (j == i + 1) + continue; + + if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode, + ops, ranges[i].exp, in_p, low, high, + strict_overflow_p)) + { + i = j - 1; + any_changes = true; + } + /* Avoid quadratic complexity if all merge_ranges calls would succeed, + while update_range_test would fail. */ + else if (update_fail_count == 64) + i = j - 1; + else + ++update_fail_count; + } + + any_changes |= optimize_range_tests_1 (opcode, first, length, true, + ops, ranges); + + if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) + any_changes |= optimize_range_tests_1 (opcode, first, length, false, + ops, ranges); + + if (any_changes && opcode != ERROR_MARK) + { + j = 0; + FOR_EACH_VEC_ELT (*ops, i, oe) + { + if (oe->op == error_mark_node) + continue; + else if (i != j) + (*ops)[j] = oe; + j++; + } + ops->truncate (j); + } + + XDELETEVEC (ranges); + return any_changes; +} + +/* Return true if STMT is a cast like: + <bb N>: + ... + _123 = (int) _234; + + <bb M>: + # _345 = PHI <_123(N), 1(...), 1(...)> + where _234 has bool type, _123 has single use and + bb N has a single successor M. This is commonly used in + the last block of a range test. */ + +static bool +final_range_test_p (gimple stmt) +{ + basic_block bb, rhs_bb; + edge e; + tree lhs, rhs; + use_operand_p use_p; + gimple use_stmt; + + if (!gimple_assign_cast_p (stmt)) + return false; + bb = gimple_bb (stmt); + if (!single_succ_p (bb)) + return false; + e = single_succ_edge (bb); + if (e->flags & EDGE_COMPLEX) + return false; + + lhs = gimple_assign_lhs (stmt); + rhs = gimple_assign_rhs1 (stmt); + if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + || TREE_CODE (rhs) != SSA_NAME + || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE) + return false; + + /* Test whether lhs is consumed only by a PHI in the only successor bb. */ + if (!single_imm_use (lhs, &use_p, &use_stmt)) + return false; + + if (gimple_code (use_stmt) != GIMPLE_PHI + || gimple_bb (use_stmt) != e->dest) + return false; + + /* And that the rhs is defined in the same loop. */ + rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)); + if (rhs_bb == NULL + || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) + return false; + + return true; +} + +/* Return true if BB is suitable basic block for inter-bb range test + optimization. If BACKWARD is true, BB should be the only predecessor + of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, + or compared with to find a common basic block to which all conditions + branch to if true resp. false. If BACKWARD is false, TEST_BB should + be the only predecessor of BB. */ + +static bool +suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, + bool backward) +{ + edge_iterator ei, ei2; + edge e, e2; + gimple stmt; + gimple_stmt_iterator gsi; + bool other_edge_seen = false; + bool is_cond; + + if (test_bb == bb) + return false; + /* Check last stmt first. */ + stmt = last_stmt (bb); + if (stmt == NULL + || (gimple_code (stmt) != GIMPLE_COND + && (backward || !final_range_test_p (stmt))) + || gimple_visited_p (stmt) + || stmt_could_throw_p (stmt) + || *other_bb == bb) + return false; + is_cond = gimple_code (stmt) == GIMPLE_COND; + if (is_cond) + { + /* If last stmt is GIMPLE_COND, verify that one of the succ edges + goes to the next bb (if BACKWARD, it is TEST_BB), and the other + to *OTHER_BB (if not set yet, try to find it out). */ + if (EDGE_COUNT (bb->succs) != 2) + return false; + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) + return false; + if (e->dest == test_bb) + { + if (backward) + continue; + else + return false; + } + if (e->dest == bb) + return false; + if (*other_bb == NULL) + { + FOR_EACH_EDGE (e2, ei2, test_bb->succs) + if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) + return false; + else if (e->dest == e2->dest) + *other_bb = e->dest; + if (*other_bb == NULL) + return false; + } + if (e->dest == *other_bb) + other_edge_seen = true; + else if (backward) + return false; + } + if (*other_bb == NULL || !other_edge_seen) + return false; + } + else if (single_succ (bb) != *other_bb) + return false; + + /* Now check all PHIs of *OTHER_BB. */ + e = find_edge (bb, *other_bb); + e2 = find_edge (test_bb, *other_bb); + for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple phi = gsi_stmt (gsi); + /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments + corresponding to BB and TEST_BB predecessor must be the same. */ + if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), + gimple_phi_arg_def (phi, e2->dest_idx), 0)) + { + /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, + one of the PHIs should have the lhs of the last stmt in + that block as PHI arg and that PHI should have 0 or 1 + corresponding to it in all other range test basic blocks + considered. */ + if (!is_cond) + { + if (gimple_phi_arg_def (phi, e->dest_idx) + == gimple_assign_lhs (stmt) + && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) + || integer_onep (gimple_phi_arg_def (phi, + e2->dest_idx)))) + continue; + } + else + { + gimple test_last = last_stmt (test_bb); + if (gimple_code (test_last) != GIMPLE_COND + && gimple_phi_arg_def (phi, e2->dest_idx) + == gimple_assign_lhs (test_last) + && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) + || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) + continue; + } + + return false; + } + } + return true; +} + +/* Return true if BB doesn't have side-effects that would disallow + range test optimization, all SSA_NAMEs set in the bb are consumed + in the bb and there are no PHIs. */ + +static bool +no_side_effect_bb (basic_block bb) +{ + gimple_stmt_iterator gsi; + gimple last; + + if (!gimple_seq_empty_p (phi_nodes (bb))) + return false; + last = last_stmt (bb); + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + tree lhs; + imm_use_iterator imm_iter; + use_operand_p use_p; + + if (is_gimple_debug (stmt)) + continue; + if (gimple_has_side_effects (stmt)) + return false; + if (stmt == last) + return true; + if (!is_gimple_assign (stmt)) + return false; + lhs = gimple_assign_lhs (stmt); + if (TREE_CODE (lhs) != SSA_NAME) + return false; + if (gimple_assign_rhs_could_trap_p (stmt)) + return false; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + gimple use_stmt = USE_STMT (use_p); + if (is_gimple_debug (use_stmt)) + continue; + if (gimple_bb (use_stmt) != bb) + return false; + } + } + return false; +} + +/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, + return true and fill in *OPS recursively. */ + +static bool +get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops, + struct loop *loop) +{ + gimple stmt = SSA_NAME_DEF_STMT (var); + tree rhs[2]; + int i; + + if (!is_reassociable_op (stmt, code, loop)) + return false; + + rhs[0] = gimple_assign_rhs1 (stmt); + rhs[1] = gimple_assign_rhs2 (stmt); + gimple_set_visited (stmt, true); + for (i = 0; i < 2; i++) + if (TREE_CODE (rhs[i]) == SSA_NAME + && !get_ops (rhs[i], code, ops, loop) + && has_single_use (rhs[i])) + { + operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); + + oe->op = rhs[i]; + oe->rank = code; + oe->id = 0; + oe->count = 1; + ops->safe_push (oe); + } + return true; +} + +/* Find the ops that were added by get_ops starting from VAR, see if + they were changed during update_range_test and if yes, create new + stmts. */ + +static tree +update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops, + unsigned int *pidx, struct loop *loop) +{ + gimple stmt = SSA_NAME_DEF_STMT (var); + tree rhs[4]; + int i; + + if (!is_reassociable_op (stmt, code, loop)) + return NULL; + + rhs[0] = gimple_assign_rhs1 (stmt); + rhs[1] = gimple_assign_rhs2 (stmt); + rhs[2] = rhs[0]; + rhs[3] = rhs[1]; + for (i = 0; i < 2; i++) + if (TREE_CODE (rhs[i]) == SSA_NAME) + { + rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); + if (rhs[2 + i] == NULL_TREE) + { + if (has_single_use (rhs[i])) + rhs[2 + i] = ops[(*pidx)++]->op; + else + rhs[2 + i] = rhs[i]; + } + } + if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) + && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + var = make_ssa_name (TREE_TYPE (var), NULL); + gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), + var, rhs[2], rhs[3]); + gimple_set_uid (g, gimple_uid (stmt)); + gimple_set_visited (g, true); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + } + return var; +} + +/* Structure to track the initial value passed to get_ops and + the range in the ops vector for each basic block. */ + +struct inter_bb_range_test_entry +{ + tree op; + unsigned int first_idx, last_idx; +}; + +/* Inter-bb range test optimization. */ + +static void +maybe_optimize_range_tests (gimple stmt) +{ + basic_block first_bb = gimple_bb (stmt); + basic_block last_bb = first_bb; + basic_block other_bb = NULL; + basic_block bb; + edge_iterator ei; + edge e; + auto_vec<operand_entry_t> ops; + auto_vec<inter_bb_range_test_entry> bbinfo; + bool any_changes = false; + + /* Consider only basic blocks that end with GIMPLE_COND or + a cast statement satisfying final_range_test_p. All + but the last bb in the first_bb .. last_bb range + should end with GIMPLE_COND. */ + if (gimple_code (stmt) == GIMPLE_COND) + { + if (EDGE_COUNT (first_bb->succs) != 2) + return; + } + else if (final_range_test_p (stmt)) + other_bb = single_succ (first_bb); + else + return; + + if (stmt_could_throw_p (stmt)) + return; + + /* As relative ordering of post-dominator sons isn't fixed, + maybe_optimize_range_tests can be called first on any + bb in the range we want to optimize. So, start searching + backwards, if first_bb can be set to a predecessor. */ + while (single_pred_p (first_bb)) + { + basic_block pred_bb = single_pred (first_bb); + if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) + break; + if (!no_side_effect_bb (first_bb)) + break; + first_bb = pred_bb; + } + /* If first_bb is last_bb, other_bb hasn't been computed yet. + Before starting forward search in last_bb successors, find + out the other_bb. */ + if (first_bb == last_bb) + { + other_bb = NULL; + /* As non-GIMPLE_COND last stmt always terminates the range, + if forward search didn't discover anything, just give up. */ + if (gimple_code (stmt) != GIMPLE_COND) + return; + /* Look at both successors. Either it ends with a GIMPLE_COND + and satisfies suitable_cond_bb, or ends with a cast and + other_bb is that cast's successor. */ + FOR_EACH_EDGE (e, ei, first_bb->succs) + if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) + || e->dest == first_bb) + return; + else if (single_pred_p (e->dest)) + { + stmt = last_stmt (e->dest); + if (stmt + && gimple_code (stmt) == GIMPLE_COND + && EDGE_COUNT (e->dest->succs) == 2) + { + if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) + break; + else + other_bb = NULL; + } + else if (stmt + && final_range_test_p (stmt) + && find_edge (first_bb, single_succ (e->dest))) + { + other_bb = single_succ (e->dest); + if (other_bb == first_bb) + other_bb = NULL; + } + } + if (other_bb == NULL) + return; + } + /* Now do the forward search, moving last_bb to successor bbs + that aren't other_bb. */ + while (EDGE_COUNT (last_bb->succs) == 2) + { + FOR_EACH_EDGE (e, ei, last_bb->succs) + if (e->dest != other_bb) + break; + if (e == NULL) + break; + if (!single_pred_p (e->dest)) + break; + if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) + break; + if (!no_side_effect_bb (e->dest)) + break; + last_bb = e->dest; + } + if (first_bb == last_bb) + return; + /* Here basic blocks first_bb through last_bb's predecessor + end with GIMPLE_COND, all of them have one of the edges to + other_bb and another to another block in the range, + all blocks except first_bb don't have side-effects and + last_bb ends with either GIMPLE_COND, or cast satisfying + final_range_test_p. */ + for (bb = last_bb; ; bb = single_pred (bb)) + { + enum tree_code code; + tree lhs, rhs; + inter_bb_range_test_entry bb_ent; + + bb_ent.op = NULL_TREE; + bb_ent.first_idx = ops.length (); + bb_ent.last_idx = bb_ent.first_idx; + e = find_edge (bb, other_bb); + stmt = last_stmt (bb); + gimple_set_visited (stmt, true); + if (gimple_code (stmt) != GIMPLE_COND) + { + use_operand_p use_p; + gimple phi; + edge e2; + unsigned int d; + + lhs = gimple_assign_lhs (stmt); + rhs = gimple_assign_rhs1 (stmt); + gcc_assert (bb == last_bb); + + /* stmt is + _123 = (int) _234; + + followed by: + <bb M>: + # _345 = PHI <_123(N), 1(...), 1(...)> + + or 0 instead of 1. If it is 0, the _234 + range test is anded together with all the + other range tests, if it is 1, it is ored with + them. */ + single_imm_use (lhs, &use_p, &phi); + gcc_assert (gimple_code (phi) == GIMPLE_PHI); + e2 = find_edge (first_bb, other_bb); + d = e2->dest_idx; + gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); + if (integer_zerop (gimple_phi_arg_def (phi, d))) + code = BIT_AND_EXPR; + else + { + gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); + code = BIT_IOR_EXPR; + } + + /* If _234 SSA_NAME_DEF_STMT is + _234 = _567 | _789; + (or &, corresponding to 1/0 in the phi arguments, + push into ops the individual range test arguments + of the bitwise or resp. and, recursively. */ + if (!get_ops (rhs, code, &ops, + loop_containing_stmt (stmt)) + && has_single_use (rhs)) + { + /* Otherwise, push the _234 range test itself. */ + operand_entry_t oe + = (operand_entry_t) pool_alloc (operand_entry_pool); + + oe->op = rhs; + oe->rank = code; + oe->id = 0; + oe->count = 1; + ops.safe_push (oe); + bb_ent.last_idx++; + } + else + bb_ent.last_idx = ops.length (); + bb_ent.op = rhs; + bbinfo.safe_push (bb_ent); + continue; + } + /* Otherwise stmt is GIMPLE_COND. */ + code = gimple_cond_code (stmt); + lhs = gimple_cond_lhs (stmt); + rhs = gimple_cond_rhs (stmt); + if (TREE_CODE (lhs) == SSA_NAME + && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + && ((code != EQ_EXPR && code != NE_EXPR) + || rhs != boolean_false_node + /* Either push into ops the individual bitwise + or resp. and operands, depending on which + edge is other_bb. */ + || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) + ^ (code == EQ_EXPR)) + ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, + loop_containing_stmt (stmt)))) + { + /* Or push the GIMPLE_COND stmt itself. */ + operand_entry_t oe + = (operand_entry_t) pool_alloc (operand_entry_pool); + + oe->op = NULL; + oe->rank = (e->flags & EDGE_TRUE_VALUE) + ? BIT_IOR_EXPR : BIT_AND_EXPR; + /* oe->op = NULL signs that there is no SSA_NAME + for the range test, and oe->id instead is the + basic block number, at which's end the GIMPLE_COND + is. */ + oe->id = bb->index; + oe->count = 1; + ops.safe_push (oe); + bb_ent.op = NULL; + bb_ent.last_idx++; + } + else if (ops.length () > bb_ent.first_idx) + { + bb_ent.op = lhs; + bb_ent.last_idx = ops.length (); + } + bbinfo.safe_push (bb_ent); + if (bb == first_bb) + break; + } + if (ops.length () > 1) + any_changes = optimize_range_tests (ERROR_MARK, &ops); + if (any_changes) + { + unsigned int idx; + /* update_ops relies on has_single_use predicates returning the + same values as it did during get_ops earlier. Additionally it + never removes statements, only adds new ones and it should walk + from the single imm use and check the predicate already before + making those changes. + On the other side, the handling of GIMPLE_COND directly can turn + previously multiply used SSA_NAMEs into single use SSA_NAMEs, so + it needs to be done in a separate loop afterwards. */ + for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) + { + if (bbinfo[idx].first_idx < bbinfo[idx].last_idx + && bbinfo[idx].op != NULL_TREE) + { + tree new_op; + + stmt = last_stmt (bb); + new_op = update_ops (bbinfo[idx].op, + (enum tree_code) + ops[bbinfo[idx].first_idx]->rank, + ops, &bbinfo[idx].first_idx, + loop_containing_stmt (stmt)); + if (new_op == NULL_TREE) + { + gcc_assert (bb == last_bb); + new_op = ops[bbinfo[idx].first_idx++]->op; + } + if (bbinfo[idx].op != new_op) + { + imm_use_iterator iter; + use_operand_p use_p; + gimple use_stmt, cast_stmt = NULL; + + FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) + if (is_gimple_debug (use_stmt)) + continue; + else if (gimple_code (use_stmt) == GIMPLE_COND + || gimple_code (use_stmt) == GIMPLE_PHI) + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, new_op); + else if (gimple_assign_cast_p (use_stmt)) + cast_stmt = use_stmt; + else + gcc_unreachable (); + if (cast_stmt) + { + gcc_assert (bb == last_bb); + tree lhs = gimple_assign_lhs (cast_stmt); + tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL); + enum tree_code rhs_code + = gimple_assign_rhs_code (cast_stmt); + gimple g; + if (is_gimple_min_invariant (new_op)) + { + new_op = fold_convert (TREE_TYPE (lhs), new_op); + g = gimple_build_assign (new_lhs, new_op); + } + else + g = gimple_build_assign_with_ops (rhs_code, new_lhs, + new_op, NULL_TREE); + gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt); + gimple_set_uid (g, gimple_uid (cast_stmt)); + gimple_set_visited (g, true); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) + if (is_gimple_debug (use_stmt)) + continue; + else if (gimple_code (use_stmt) == GIMPLE_COND + || gimple_code (use_stmt) == GIMPLE_PHI) + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, new_lhs); + else + gcc_unreachable (); + } + } + } + if (bb == first_bb) + break; + } + for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) + { + if (bbinfo[idx].first_idx < bbinfo[idx].last_idx + && bbinfo[idx].op == NULL_TREE + && ops[bbinfo[idx].first_idx]->op != NULL_TREE) + { + stmt = last_stmt (bb); + if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) + gimple_cond_make_false (stmt); + else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) + gimple_cond_make_true (stmt); + else + { + gimple_cond_set_code (stmt, NE_EXPR); + gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op); + gimple_cond_set_rhs (stmt, boolean_false_node); + } + update_stmt (stmt); + } + if (bb == first_bb) + break; + } + } +} + +/* Return true if OPERAND is defined by a PHI node which uses the LHS + of STMT in it's operands. This is also known as a "destructive + update" operation. */ + +static bool +is_phi_for_stmt (gimple stmt, tree operand) +{ + gimple def_stmt; + tree lhs; + use_operand_p arg_p; + ssa_op_iter i; + + if (TREE_CODE (operand) != SSA_NAME) + return false; + + lhs = gimple_assign_lhs (stmt); + + def_stmt = SSA_NAME_DEF_STMT (operand); + if (gimple_code (def_stmt) != GIMPLE_PHI) + return false; + + FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) + if (lhs == USE_FROM_PTR (arg_p)) + return true; + return false; +} + +/* Remove def stmt of VAR if VAR has zero uses and recurse + on rhs1 operand if so. */ + +static void +remove_visited_stmt_chain (tree var) +{ + gimple stmt; + gimple_stmt_iterator gsi; + + while (1) + { + if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) + return; + stmt = SSA_NAME_DEF_STMT (var); + if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) + { + var = gimple_assign_rhs1 (stmt); + gsi = gsi_for_stmt (stmt); + gsi_remove (&gsi, true); + release_defs (stmt); + } + else + return; + } +} + +/* This function checks three consequtive operands in + passed operands vector OPS starting from OPINDEX and + swaps two operands if it is profitable for binary operation + consuming OPINDEX + 1 abnd OPINDEX + 2 operands. + + We pair ops with the same rank if possible. + + The alternative we try is to see if STMT is a destructive + update style statement, which is like: + b = phi (a, ...) + a = c + b; + In that case, we want to use the destructive update form to + expose the possible vectorizer sum reduction opportunity. + In that case, the third operand will be the phi node. This + check is not performed if STMT is null. + + We could, of course, try to be better as noted above, and do a + lot of work to try to find these opportunities in >3 operand + cases, but it is unlikely to be worth it. */ + +static void +swap_ops_for_binary_stmt (vec<operand_entry_t> ops, + unsigned int opindex, gimple stmt) +{ + operand_entry_t oe1, oe2, oe3; + + oe1 = ops[opindex]; + oe2 = ops[opindex + 1]; + oe3 = ops[opindex + 2]; + + if ((oe1->rank == oe2->rank + && oe2->rank != oe3->rank) + || (stmt && is_phi_for_stmt (stmt, oe3->op) + && !is_phi_for_stmt (stmt, oe1->op) + && !is_phi_for_stmt (stmt, oe2->op))) + { + struct operand_entry temp = *oe3; + oe3->op = oe1->op; + oe3->rank = oe1->rank; + oe1->op = temp.op; + oe1->rank= temp.rank; + } + else if ((oe1->rank == oe3->rank + && oe2->rank != oe3->rank) + || (stmt && is_phi_for_stmt (stmt, oe2->op) + && !is_phi_for_stmt (stmt, oe1->op) + && !is_phi_for_stmt (stmt, oe3->op))) + { + struct operand_entry temp = *oe2; + oe2->op = oe1->op; + oe2->rank = oe1->rank; + oe1->op = temp.op; + oe1->rank = temp.rank; + } +} + +/* If definition of RHS1 or RHS2 dominates STMT, return the later of those + two definitions, otherwise return STMT. */ + +static inline gimple +find_insert_point (gimple stmt, tree rhs1, tree rhs2) +{ + if (TREE_CODE (rhs1) == SSA_NAME + && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) + stmt = SSA_NAME_DEF_STMT (rhs1); + if (TREE_CODE (rhs2) == SSA_NAME + && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) + stmt = SSA_NAME_DEF_STMT (rhs2); + return stmt; +} + +/* Recursively rewrite our linearized statements so that the operators + match those in OPS[OPINDEX], putting the computation in rank + order. Return new lhs. */ + +static tree +rewrite_expr_tree (gimple stmt, unsigned int opindex, + vec<operand_entry_t> ops, bool changed) +{ + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + tree lhs = gimple_assign_lhs (stmt); + operand_entry_t oe; + + /* The final recursion case for this function is that you have + exactly two operations left. + If we had one exactly one op in the entire list to start with, we + would have never called this function, and the tail recursion + rewrites them one at a time. */ + if (opindex + 2 == ops.length ()) + { + operand_entry_t oe1, oe2; + + oe1 = ops[opindex]; + oe2 = ops[opindex + 1]; + + if (rhs1 != oe1->op || rhs2 != oe2->op) + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + unsigned int uid = gimple_uid (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + if (changed) + { + gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op); + lhs = make_ssa_name (TREE_TYPE (lhs), NULL); + stmt + = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), + lhs, oe1->op, oe2->op); + gimple_set_uid (stmt, uid); + gimple_set_visited (stmt, true); + if (insert_point == gsi_stmt (gsi)) + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + else + insert_stmt_after (stmt, insert_point); + } + else + { + gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) + == stmt); + gimple_assign_set_rhs1 (stmt, oe1->op); + gimple_assign_set_rhs2 (stmt, oe2->op); + update_stmt (stmt); + } + + if (rhs1 != oe1->op && rhs1 != oe2->op) + remove_visited_stmt_chain (rhs1); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " into "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + } + return lhs; + } + + /* If we hit here, we should have 3 or more ops left. */ + gcc_assert (opindex + 2 < ops.length ()); + + /* Rewrite the next operator. */ + oe = ops[opindex]; + + /* Recurse on the LHS of the binary operator, which is guaranteed to + be the non-leaf side. */ + tree new_rhs1 + = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, + changed || oe->op != rhs2); + + if (oe->op != rhs2 || new_rhs1 != rhs1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + /* If changed is false, this is either opindex == 0 + or all outer rhs2's were equal to corresponding oe->op, + and powi_result is NULL. + That means lhs is equivalent before and after reassociation. + Otherwise ensure the old lhs SSA_NAME is not reused and + create a new stmt as well, so that any debug stmts will be + properly adjusted. */ + if (changed) + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + unsigned int uid = gimple_uid (stmt); + gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op); + + lhs = make_ssa_name (TREE_TYPE (lhs), NULL); + stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), + lhs, new_rhs1, oe->op); + gimple_set_uid (stmt, uid); + gimple_set_visited (stmt, true); + if (insert_point == gsi_stmt (gsi)) + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + else + insert_stmt_after (stmt, insert_point); + } + else + { + gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) + == stmt); + gimple_assign_set_rhs1 (stmt, new_rhs1); + gimple_assign_set_rhs2 (stmt, oe->op); + update_stmt (stmt); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " into "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + } + return lhs; +} + +/* Find out how many cycles we need to compute statements chain. + OPS_NUM holds number os statements in a chain. CPU_WIDTH is a + maximum number of independent statements we may execute per cycle. */ + +static int +get_required_cycles (int ops_num, int cpu_width) +{ + int res; + int elog; + unsigned int rest; + + /* While we have more than 2 * cpu_width operands + we may reduce number of operands by cpu_width + per cycle. */ + res = ops_num / (2 * cpu_width); + + /* Remained operands count may be reduced twice per cycle + until we have only one operand. */ + rest = (unsigned)(ops_num - res * cpu_width); + elog = exact_log2 (rest); + if (elog >= 0) + res += elog; + else + res += floor_log2 (rest) + 1; + + return res; +} + +/* Returns an optimal number of registers to use for computation of + given statements. */ + +static int +get_reassociation_width (int ops_num, enum tree_code opc, + enum machine_mode mode) +{ + int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); + int width; + int width_min; + int cycles_best; + + if (param_width > 0) + width = param_width; + else + width = targetm.sched.reassociation_width (opc, mode); + + if (width == 1) + return width; + + /* Get the minimal time required for sequence computation. */ + cycles_best = get_required_cycles (ops_num, width); + + /* Check if we may use less width and still compute sequence for + the same time. It will allow us to reduce registers usage. + get_required_cycles is monotonically increasing with lower width + so we can perform a binary search for the minimal width that still + results in the optimal cycle count. */ + width_min = 1; + while (width > width_min) + { + int width_mid = (width + width_min) / 2; + + if (get_required_cycles (ops_num, width_mid) == cycles_best) + width = width_mid; + else if (width_min < width_mid) + width_min = width_mid; + else + break; + } + + return width; +} + +/* Recursively rewrite our linearized statements so that the operators + match those in OPS[OPINDEX], putting the computation in rank + order and trying to allow operations to be executed in + parallel. */ + +static void +rewrite_expr_tree_parallel (gimple stmt, int width, + vec<operand_entry_t> ops) +{ + enum tree_code opcode = gimple_assign_rhs_code (stmt); + int op_num = ops.length (); + int stmt_num = op_num - 1; + gimple *stmts = XALLOCAVEC (gimple, stmt_num); + int op_index = op_num - 1; + int stmt_index = 0; + int ready_stmts_end = 0; + int i = 0; + tree last_rhs1 = gimple_assign_rhs1 (stmt); + + /* We start expression rewriting from the top statements. + So, in this loop we create a full list of statements + we will work with. */ + stmts[stmt_num - 1] = stmt; + for (i = stmt_num - 2; i >= 0; i--) + stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); + + for (i = 0; i < stmt_num; i++) + { + tree op1, op2; + + /* Determine whether we should use results of + already handled statements or not. */ + if (ready_stmts_end == 0 + && (i - stmt_index >= width || op_index < 1)) + ready_stmts_end = i; + + /* Now we choose operands for the next statement. Non zero + value in ready_stmts_end means here that we should use + the result of already generated statements as new operand. */ + if (ready_stmts_end > 0) + { + op1 = gimple_assign_lhs (stmts[stmt_index++]); + if (ready_stmts_end > stmt_index) + op2 = gimple_assign_lhs (stmts[stmt_index++]); + else if (op_index >= 0) + op2 = ops[op_index--]->op; + else + { + gcc_assert (stmt_index < i); + op2 = gimple_assign_lhs (stmts[stmt_index++]); + } + + if (stmt_index >= ready_stmts_end) + ready_stmts_end = 0; + } + else + { + if (op_index > 1) + swap_ops_for_binary_stmt (ops, op_index - 2, NULL); + op2 = ops[op_index--]->op; + op1 = ops[op_index--]->op; + } + + /* If we emit the last statement then we should put + operands into the last statement. It will also + break the loop. */ + if (op_index < 0 && stmt_index == i) + i = stmt_num - 1; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); + print_gimple_stmt (dump_file, stmts[i], 0, 0); + } + + /* We keep original statement only for the last one. All + others are recreated. */ + if (i == stmt_num - 1) + { + gimple_assign_set_rhs1 (stmts[i], op1); + gimple_assign_set_rhs2 (stmts[i], op2); + update_stmt (stmts[i]); + } + else + stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " into "); + print_gimple_stmt (dump_file, stmts[i], 0, 0); + } + } + + remove_visited_stmt_chain (last_rhs1); +} + +/* Transform STMT, which is really (A +B) + (C + D) into the left + linear form, ((A+B)+C)+D. + Recurse on D if necessary. */ + +static void +linearize_expr (gimple stmt) +{ + gimple_stmt_iterator gsi; + gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); + gimple oldbinrhs = binrhs; + enum tree_code rhscode = gimple_assign_rhs_code (stmt); + gimple newbinrhs = NULL; + struct loop *loop = loop_containing_stmt (stmt); + tree lhs = gimple_assign_lhs (stmt); + + gcc_assert (is_reassociable_op (binlhs, rhscode, loop) + && is_reassociable_op (binrhs, rhscode, loop)); + + gsi = gsi_for_stmt (stmt); + + gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); + binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs), + make_ssa_name (TREE_TYPE (lhs), NULL), + gimple_assign_lhs (binlhs), + gimple_assign_rhs2 (binrhs)); + gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); + gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); + gimple_set_uid (binrhs, gimple_uid (stmt)); + + if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) + newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Linearized: "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + reassociate_stats.linearized++; + update_stmt (stmt); + + gsi = gsi_for_stmt (oldbinrhs); + gsi_remove (&gsi, true); + release_defs (oldbinrhs); + + gimple_set_visited (stmt, true); + gimple_set_visited (binlhs, true); + gimple_set_visited (binrhs, true); + + /* Tail recurse on the new rhs if it still needs reassociation. */ + if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) + /* ??? This should probably be linearize_expr (newbinrhs) but I don't + want to change the algorithm while converting to tuples. */ + linearize_expr (stmt); +} + +/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return + it. Otherwise, return NULL. */ + +static gimple +get_single_immediate_use (tree lhs) +{ + use_operand_p immuse; + gimple immusestmt; + + if (TREE_CODE (lhs) == SSA_NAME + && single_imm_use (lhs, &immuse, &immusestmt) + && is_gimple_assign (immusestmt)) + return immusestmt; + + return NULL; +} + +/* Recursively negate the value of TONEGATE, and return the SSA_NAME + representing the negated value. Insertions of any necessary + instructions go before GSI. + This function is recursive in that, if you hand it "a_5" as the + value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will + transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ + +static tree +negate_value (tree tonegate, gimple_stmt_iterator *gsip) +{ + gimple negatedefstmt = NULL; + tree resultofnegate; + gimple_stmt_iterator gsi; + unsigned int uid; + + /* If we are trying to negate a name, defined by an add, negate the + add operands instead. */ + if (TREE_CODE (tonegate) == SSA_NAME) + negatedefstmt = SSA_NAME_DEF_STMT (tonegate); + if (TREE_CODE (tonegate) == SSA_NAME + && is_gimple_assign (negatedefstmt) + && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME + && has_single_use (gimple_assign_lhs (negatedefstmt)) + && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) + { + tree rhs1 = gimple_assign_rhs1 (negatedefstmt); + tree rhs2 = gimple_assign_rhs2 (negatedefstmt); + tree lhs = gimple_assign_lhs (negatedefstmt); + gimple g; + + gsi = gsi_for_stmt (negatedefstmt); + rhs1 = negate_value (rhs1, &gsi); + + gsi = gsi_for_stmt (negatedefstmt); + rhs2 = negate_value (rhs2, &gsi); + + gsi = gsi_for_stmt (negatedefstmt); + lhs = make_ssa_name (TREE_TYPE (lhs), NULL); + gimple_set_visited (negatedefstmt, true); + g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2); + gimple_set_uid (g, gimple_uid (negatedefstmt)); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + return lhs; + } + + tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); + resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, + NULL_TREE, true, GSI_SAME_STMT); + gsi = *gsip; + uid = gimple_uid (gsi_stmt (gsi)); + for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (gimple_uid (stmt) != 0) + break; + gimple_set_uid (stmt, uid); + } + return resultofnegate; +} + +/* Return true if we should break up the subtract in STMT into an add + with negate. This is true when we the subtract operands are really + adds, or the subtract itself is used in an add expression. In + either case, breaking up the subtract into an add with negate + exposes the adds to reassociation. */ + +static bool +should_break_up_subtract (gimple stmt) +{ + tree lhs = gimple_assign_lhs (stmt); + tree binlhs = gimple_assign_rhs1 (stmt); + tree binrhs = gimple_assign_rhs2 (stmt); + gimple immusestmt; + struct loop *loop = loop_containing_stmt (stmt); + + if (TREE_CODE (binlhs) == SSA_NAME + && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) + return true; + + if (TREE_CODE (binrhs) == SSA_NAME + && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) + return true; + + if (TREE_CODE (lhs) == SSA_NAME + && (immusestmt = get_single_immediate_use (lhs)) + && is_gimple_assign (immusestmt) + && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR + || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) + return true; + return false; +} + +/* Transform STMT from A - B into A + -B. */ + +static void +break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) +{ + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Breaking up subtract "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + rhs2 = negate_value (rhs2, gsip); + gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); + update_stmt (stmt); +} + +/* Determine whether STMT is a builtin call that raises an SSA name + to an integer power and has only one use. If so, and this is early + reassociation and unsafe math optimizations are permitted, place + the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. + If any of these conditions does not hold, return FALSE. */ + +static bool +acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent) +{ + tree fndecl, arg1; + REAL_VALUE_TYPE c, cint; + + if (!first_pass_instance + || !flag_unsafe_math_optimizations + || !is_gimple_call (stmt) + || !has_single_use (gimple_call_lhs (stmt))) + return false; + + fndecl = gimple_call_fndecl (stmt); + + if (!fndecl + || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) + return false; + + switch (DECL_FUNCTION_CODE (fndecl)) + { + CASE_FLT_FN (BUILT_IN_POW): + *base = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + + if (TREE_CODE (arg1) != REAL_CST) + return false; + + c = TREE_REAL_CST (arg1); + + if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) + return false; + + *exponent = real_to_integer (&c); + real_from_integer (&cint, VOIDmode, *exponent, + *exponent < 0 ? -1 : 0, 0); + if (!real_identical (&c, &cint)) + return false; + + break; + + CASE_FLT_FN (BUILT_IN_POWI): + *base = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + + if (!tree_fits_shwi_p (arg1)) + return false; + + *exponent = tree_to_shwi (arg1); + break; + + default: + return false; + } + + /* Expanding negative exponents is generally unproductive, so we don't + complicate matters with those. Exponents of zero and one should + have been handled by expression folding. */ + if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) + return false; + + return true; +} + +/* Recursively linearize a binary expression that is the RHS of STMT. + Place the operands of the expression tree in the vector named OPS. */ + +static void +linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt, + bool is_associative, bool set_visited) +{ + tree binlhs = gimple_assign_rhs1 (stmt); + tree binrhs = gimple_assign_rhs2 (stmt); + gimple binlhsdef = NULL, binrhsdef = NULL; + bool binlhsisreassoc = false; + bool binrhsisreassoc = false; + enum tree_code rhscode = gimple_assign_rhs_code (stmt); + struct loop *loop = loop_containing_stmt (stmt); + tree base = NULL_TREE; + HOST_WIDE_INT exponent = 0; + + if (set_visited) + gimple_set_visited (stmt, true); + + if (TREE_CODE (binlhs) == SSA_NAME) + { + binlhsdef = SSA_NAME_DEF_STMT (binlhs); + binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) + && !stmt_could_throw_p (binlhsdef)); + } + + if (TREE_CODE (binrhs) == SSA_NAME) + { + binrhsdef = SSA_NAME_DEF_STMT (binrhs); + binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) + && !stmt_could_throw_p (binrhsdef)); + } + + /* If the LHS is not reassociable, but the RHS is, we need to swap + them. If neither is reassociable, there is nothing we can do, so + just put them in the ops vector. If the LHS is reassociable, + linearize it. If both are reassociable, then linearize the RHS + and the LHS. */ + + if (!binlhsisreassoc) + { + tree temp; + + /* If this is not a associative operation like division, give up. */ + if (!is_associative) + { + add_to_ops_vec (ops, binrhs); + return; + } + + if (!binrhsisreassoc) + { + if (rhscode == MULT_EXPR + && TREE_CODE (binrhs) == SSA_NAME + && acceptable_pow_call (binrhsdef, &base, &exponent)) + { + add_repeat_to_ops_vec (ops, base, exponent); + gimple_set_visited (binrhsdef, true); + } + else + add_to_ops_vec (ops, binrhs); + + if (rhscode == MULT_EXPR + && TREE_CODE (binlhs) == SSA_NAME + && acceptable_pow_call (binlhsdef, &base, &exponent)) + { + add_repeat_to_ops_vec (ops, base, exponent); + gimple_set_visited (binlhsdef, true); + } + else + add_to_ops_vec (ops, binlhs); + + return; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "swapping operands of "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + swap_ssa_operands (stmt, + gimple_assign_rhs1_ptr (stmt), + gimple_assign_rhs2_ptr (stmt)); + update_stmt (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " is now "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + /* We want to make it so the lhs is always the reassociative op, + so swap. */ + temp = binlhs; + binlhs = binrhs; + binrhs = temp; + } + else if (binrhsisreassoc) + { + linearize_expr (stmt); + binlhs = gimple_assign_rhs1 (stmt); + binrhs = gimple_assign_rhs2 (stmt); + } + + gcc_assert (TREE_CODE (binrhs) != SSA_NAME + || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), + rhscode, loop)); + linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), + is_associative, set_visited); + + if (rhscode == MULT_EXPR + && TREE_CODE (binrhs) == SSA_NAME + && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent)) + { + add_repeat_to_ops_vec (ops, base, exponent); + gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true); + } + else + add_to_ops_vec (ops, binrhs); +} + +/* Repropagate the negates back into subtracts, since no other pass + currently does it. */ + +static void +repropagate_negates (void) +{ + unsigned int i = 0; + tree negate; + + FOR_EACH_VEC_ELT (plus_negates, i, negate) + { + gimple user = get_single_immediate_use (negate); + + if (!user || !is_gimple_assign (user)) + continue; + + /* The negate operand can be either operand of a PLUS_EXPR + (it can be the LHS if the RHS is a constant for example). + + Force the negate operand to the RHS of the PLUS_EXPR, then + transform the PLUS_EXPR into a MINUS_EXPR. */ + if (gimple_assign_rhs_code (user) == PLUS_EXPR) + { + /* If the negated operand appears on the LHS of the + PLUS_EXPR, exchange the operands of the PLUS_EXPR + to force the negated operand to the RHS of the PLUS_EXPR. */ + if (gimple_assign_rhs1 (user) == negate) + { + swap_ssa_operands (user, + gimple_assign_rhs1_ptr (user), + gimple_assign_rhs2_ptr (user)); + } + + /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace + the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ + if (gimple_assign_rhs2 (user) == negate) + { + tree rhs1 = gimple_assign_rhs1 (user); + tree rhs2 = get_unary_op (negate, NEGATE_EXPR); + gimple_stmt_iterator gsi = gsi_for_stmt (user); + gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); + update_stmt (user); + } + } + else if (gimple_assign_rhs_code (user) == MINUS_EXPR) + { + if (gimple_assign_rhs1 (user) == negate) + { + /* We have + x = -a + y = x - b + which we transform into + x = a + b + y = -x . + This pushes down the negate which we possibly can merge + into some other operation, hence insert it into the + plus_negates vector. */ + gimple feed = SSA_NAME_DEF_STMT (negate); + tree a = gimple_assign_rhs1 (feed); + tree b = gimple_assign_rhs2 (user); + gimple_stmt_iterator gsi = gsi_for_stmt (feed); + gimple_stmt_iterator gsi2 = gsi_for_stmt (user); + tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL); + gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b); + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); + gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL); + user = gsi_stmt (gsi2); + update_stmt (user); + gsi_remove (&gsi, true); + release_defs (feed); + plus_negates.safe_push (gimple_assign_lhs (user)); + } + else + { + /* Transform "x = -a; y = b - x" into "y = b + a", getting + rid of one operation. */ + gimple feed = SSA_NAME_DEF_STMT (negate); + tree a = gimple_assign_rhs1 (feed); + tree rhs1 = gimple_assign_rhs1 (user); + gimple_stmt_iterator gsi = gsi_for_stmt (user); + gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); + update_stmt (gsi_stmt (gsi)); + } + } + } +} + +/* Returns true if OP is of a type for which we can do reassociation. + That is for integral or non-saturating fixed-point types, and for + floating point type when associative-math is enabled. */ + +static bool +can_reassociate_p (tree op) +{ + tree type = TREE_TYPE (op); + if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) + || NON_SAT_FIXED_POINT_TYPE_P (type) + || (flag_associative_math && FLOAT_TYPE_P (type))) + return true; + return false; +} + +/* Break up subtract operations in block BB. + + We do this top down because we don't know whether the subtract is + part of a possible chain of reassociation except at the top. + + IE given + d = f + g + c = a + e + b = c - d + q = b - r + k = t - q + + we want to break up k = t - q, but we won't until we've transformed q + = b - r, which won't be broken up until we transform b = c - d. + + En passant, clear the GIMPLE visited flag on every statement + and set UIDs within each basic block. */ + +static void +break_up_subtract_bb (basic_block bb) +{ + gimple_stmt_iterator gsi; + basic_block son; + unsigned int uid = 1; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + gimple_set_visited (stmt, false); + gimple_set_uid (stmt, uid++); + + if (!is_gimple_assign (stmt) + || !can_reassociate_p (gimple_assign_lhs (stmt))) + continue; + + /* Look for simple gimple subtract operations. */ + if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) + { + if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) + || !can_reassociate_p (gimple_assign_rhs2 (stmt))) + continue; + + /* Check for a subtract used only in an addition. If this + is the case, transform it into add of a negate for better + reassociation. IE transform C = A-B into C = A + -B if C + is only used in an addition. */ + if (should_break_up_subtract (stmt)) + break_up_subtract (stmt, &gsi); + } + else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR + && can_reassociate_p (gimple_assign_rhs1 (stmt))) + plus_negates.safe_push (gimple_assign_lhs (stmt)); + } + for (son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + break_up_subtract_bb (son); +} + +/* Used for repeated factor analysis. */ +struct repeat_factor_d +{ + /* An SSA name that occurs in a multiply chain. */ + tree factor; + + /* Cached rank of the factor. */ + unsigned rank; + + /* Number of occurrences of the factor in the chain. */ + HOST_WIDE_INT count; + + /* An SSA name representing the product of this factor and + all factors appearing later in the repeated factor vector. */ + tree repr; +}; + +typedef struct repeat_factor_d repeat_factor, *repeat_factor_t; +typedef const struct repeat_factor_d *const_repeat_factor_t; + + +static vec<repeat_factor> repeat_factor_vec; + +/* Used for sorting the repeat factor vector. Sort primarily by + ascending occurrence count, secondarily by descending rank. */ + +static int +compare_repeat_factors (const void *x1, const void *x2) +{ + const_repeat_factor_t rf1 = (const_repeat_factor_t) x1; + const_repeat_factor_t rf2 = (const_repeat_factor_t) x2; + + if (rf1->count != rf2->count) + return rf1->count - rf2->count; + + return rf2->rank - rf1->rank; +} + +/* Look for repeated operands in OPS in the multiply tree rooted at + STMT. Replace them with an optimal sequence of multiplies and powi + builtin calls, and remove the used operands from OPS. Return an + SSA name representing the value of the replacement sequence. */ + +static tree +attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops) +{ + unsigned i, j, vec_len; + int ii; + operand_entry_t oe; + repeat_factor_t rf1, rf2; + repeat_factor rfnew; + tree result = NULL_TREE; + tree target_ssa, iter_result; + tree type = TREE_TYPE (gimple_get_lhs (stmt)); + tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gimple mul_stmt, pow_stmt; + + /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and + target. */ + if (!powi_fndecl) + return NULL_TREE; + + /* Allocate the repeated factor vector. */ + repeat_factor_vec.create (10); + + /* Scan the OPS vector for all SSA names in the product and build + up a vector of occurrence counts for each factor. */ + FOR_EACH_VEC_ELT (*ops, i, oe) + { + if (TREE_CODE (oe->op) == SSA_NAME) + { + FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) + { + if (rf1->factor == oe->op) + { + rf1->count += oe->count; + break; + } + } + + if (j >= repeat_factor_vec.length ()) + { + rfnew.factor = oe->op; + rfnew.rank = oe->rank; + rfnew.count = oe->count; + rfnew.repr = NULL_TREE; + repeat_factor_vec.safe_push (rfnew); + } + } + } + + /* Sort the repeated factor vector by (a) increasing occurrence count, + and (b) decreasing rank. */ + repeat_factor_vec.qsort (compare_repeat_factors); + + /* It is generally best to combine as many base factors as possible + into a product before applying __builtin_powi to the result. + However, the sort order chosen for the repeated factor vector + allows us to cache partial results for the product of the base + factors for subsequent use. When we already have a cached partial + result from a previous iteration, it is best to make use of it + before looking for another __builtin_pow opportunity. + + As an example, consider x * x * y * y * y * z * z * z * z. + We want to first compose the product x * y * z, raise it to the + second power, then multiply this by y * z, and finally multiply + by z. This can be done in 5 multiplies provided we cache y * z + for use in both expressions: + + t1 = y * z + t2 = t1 * x + t3 = t2 * t2 + t4 = t1 * t3 + result = t4 * z + + If we instead ignored the cached y * z and first multiplied by + the __builtin_pow opportunity z * z, we would get the inferior: + + t1 = y * z + t2 = t1 * x + t3 = t2 * t2 + t4 = z * z + t5 = t3 * t4 + result = t5 * y */ + + vec_len = repeat_factor_vec.length (); + + /* Repeatedly look for opportunities to create a builtin_powi call. */ + while (true) + { + HOST_WIDE_INT power; + + /* First look for the largest cached product of factors from + preceding iterations. If found, create a builtin_powi for + it if the minimum occurrence count for its factors is at + least 2, or just use this cached product as our next + multiplicand if the minimum occurrence count is 1. */ + FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) + { + if (rf1->repr && rf1->count > 0) + break; + } + + if (j < vec_len) + { + power = rf1->count; + + if (power == 1) + { + iter_result = rf1->repr; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + unsigned elt; + repeat_factor_t rf; + fputs ("Multiplying by cached product ", dump_file); + for (elt = j; elt < vec_len; elt++) + { + rf = &repeat_factor_vec[elt]; + print_generic_expr (dump_file, rf->factor, 0); + if (elt < vec_len - 1) + fputs (" * ", dump_file); + } + fputs ("\n", dump_file); + } + } + else + { + iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); + pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, + build_int_cst (integer_type_node, + power)); + gimple_call_set_lhs (pow_stmt, iter_result); + gimple_set_location (pow_stmt, gimple_location (stmt)); + gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + unsigned elt; + repeat_factor_t rf; + fputs ("Building __builtin_pow call for cached product (", + dump_file); + for (elt = j; elt < vec_len; elt++) + { + rf = &repeat_factor_vec[elt]; + print_generic_expr (dump_file, rf->factor, 0); + if (elt < vec_len - 1) + fputs (" * ", dump_file); + } + fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", + power); + } + } + } + else + { + /* Otherwise, find the first factor in the repeated factor + vector whose occurrence count is at least 2. If no such + factor exists, there are no builtin_powi opportunities + remaining. */ + FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) + { + if (rf1->count >= 2) + break; + } + + if (j >= vec_len) + break; + + power = rf1->count; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + unsigned elt; + repeat_factor_t rf; + fputs ("Building __builtin_pow call for (", dump_file); + for (elt = j; elt < vec_len; elt++) + { + rf = &repeat_factor_vec[elt]; + print_generic_expr (dump_file, rf->factor, 0); + if (elt < vec_len - 1) + fputs (" * ", dump_file); + } + fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power); + } + + reassociate_stats.pows_created++; + + /* Visit each element of the vector in reverse order (so that + high-occurrence elements are visited first, and within the + same occurrence count, lower-ranked elements are visited + first). Form a linear product of all elements in this order + whose occurrencce count is at least that of element J. + Record the SSA name representing the product of each element + with all subsequent elements in the vector. */ + if (j == vec_len - 1) + rf1->repr = rf1->factor; + else + { + for (ii = vec_len - 2; ii >= (int)j; ii--) + { + tree op1, op2; + + rf1 = &repeat_factor_vec[ii]; + rf2 = &repeat_factor_vec[ii + 1]; + + /* Init the last factor's representative to be itself. */ + if (!rf2->repr) + rf2->repr = rf2->factor; + + op1 = rf1->factor; + op2 = rf2->repr; + + target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); + mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, + target_ssa, + op1, op2); + gimple_set_location (mul_stmt, gimple_location (stmt)); + gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); + rf1->repr = target_ssa; + + /* Don't reprocess the multiply we just introduced. */ + gimple_set_visited (mul_stmt, true); + } + } + + /* Form a call to __builtin_powi for the maximum product + just formed, raised to the power obtained earlier. */ + rf1 = &repeat_factor_vec[j]; + iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); + pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, + build_int_cst (integer_type_node, + power)); + gimple_call_set_lhs (pow_stmt, iter_result); + gimple_set_location (pow_stmt, gimple_location (stmt)); + gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); + } + + /* If we previously formed at least one other builtin_powi call, + form the product of this one and those others. */ + if (result) + { + tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); + mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result, + result, iter_result); + gimple_set_location (mul_stmt, gimple_location (stmt)); + gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); + gimple_set_visited (mul_stmt, true); + result = new_result; + } + else + result = iter_result; + + /* Decrement the occurrence count of each element in the product + by the count found above, and remove this many copies of each + factor from OPS. */ + for (i = j; i < vec_len; i++) + { + unsigned k = power; + unsigned n; + + rf1 = &repeat_factor_vec[i]; + rf1->count -= power; + + FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) + { + if (oe->op == rf1->factor) + { + if (oe->count <= k) + { + ops->ordered_remove (n); + k -= oe->count; + + if (k == 0) + break; + } + else + { + oe->count -= k; + break; + } + } + } + } + } + + /* At this point all elements in the repeated factor vector have a + remaining occurrence count of 0 or 1, and those with a count of 1 + don't have cached representatives. Re-sort the ops vector and + clean up. */ + ops->qsort (sort_by_operand_rank); + repeat_factor_vec.release (); + + /* Return the final product computed herein. Note that there may + still be some elements with single occurrence count left in OPS; + those will be handled by the normal reassociation logic. */ + return result; +} + +/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ + +static void +transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) +{ + tree rhs1; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + rhs1 = gimple_assign_rhs1 (stmt); + gimple_assign_set_rhs_from_tree (gsi, new_rhs); + update_stmt (stmt); + remove_visited_stmt_chain (rhs1); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " into "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } +} + +/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ + +static void +transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, + tree rhs1, tree rhs2) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); + update_stmt (gsi_stmt (*gsi)); + remove_visited_stmt_chain (rhs1); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " into "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } +} + +/* Reassociate expressions in basic block BB and its post-dominator as + children. */ + +static void +reassociate_bb (basic_block bb) +{ + gimple_stmt_iterator gsi; + basic_block son; + gimple stmt = last_stmt (bb); + + if (stmt && !gimple_visited_p (stmt)) + maybe_optimize_range_tests (stmt); + + for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) + { + stmt = gsi_stmt (gsi); + + if (is_gimple_assign (stmt) + && !stmt_could_throw_p (stmt)) + { + tree lhs, rhs1, rhs2; + enum tree_code rhs_code = gimple_assign_rhs_code (stmt); + + /* If this is not a gimple binary expression, there is + nothing for us to do with it. */ + if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) + continue; + + /* If this was part of an already processed statement, + we don't need to touch it again. */ + if (gimple_visited_p (stmt)) + { + /* This statement might have become dead because of previous + reassociations. */ + if (has_zero_uses (gimple_get_lhs (stmt))) + { + gsi_remove (&gsi, true); + release_defs (stmt); + /* We might end up removing the last stmt above which + places the iterator to the end of the sequence. + Reset it to the last stmt in this case which might + be the end of the sequence as well if we removed + the last statement of the sequence. In which case + we need to bail out. */ + if (gsi_end_p (gsi)) + { + gsi = gsi_last_bb (bb); + if (gsi_end_p (gsi)) + break; + } + } + continue; + } + + lhs = gimple_assign_lhs (stmt); + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + + /* For non-bit or min/max operations we can't associate + all types. Verify that here. */ + if (rhs_code != BIT_IOR_EXPR + && rhs_code != BIT_AND_EXPR + && rhs_code != BIT_XOR_EXPR + && rhs_code != MIN_EXPR + && rhs_code != MAX_EXPR + && (!can_reassociate_p (lhs) + || !can_reassociate_p (rhs1) + || !can_reassociate_p (rhs2))) + continue; + + if (associative_tree_code (rhs_code)) + { + auto_vec<operand_entry_t> ops; + tree powi_result = NULL_TREE; + + /* There may be no immediate uses left by the time we + get here because we may have eliminated them all. */ + if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) + continue; + + gimple_set_visited (stmt, true); + linearize_expr_tree (&ops, stmt, true, true); + ops.qsort (sort_by_operand_rank); + optimize_ops_list (rhs_code, &ops); + if (undistribute_ops_list (rhs_code, &ops, + loop_containing_stmt (stmt))) + { + ops.qsort (sort_by_operand_rank); + optimize_ops_list (rhs_code, &ops); + } + + if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) + optimize_range_tests (rhs_code, &ops); + + if (first_pass_instance + && rhs_code == MULT_EXPR + && flag_unsafe_math_optimizations) + powi_result = attempt_builtin_powi (stmt, &ops); + + /* If the operand vector is now empty, all operands were + consumed by the __builtin_powi optimization. */ + if (ops.length () == 0) + transform_stmt_to_copy (&gsi, stmt, powi_result); + else if (ops.length () == 1) + { + tree last_op = ops.last ()->op; + + if (powi_result) + transform_stmt_to_multiply (&gsi, stmt, last_op, + powi_result); + else + transform_stmt_to_copy (&gsi, stmt, last_op); + } + else + { + enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); + int ops_num = ops.length (); + int width = get_reassociation_width (ops_num, rhs_code, mode); + tree new_lhs = lhs; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Width = %d was chosen for reassociation\n", width); + + if (width > 1 + && ops.length () > 3) + rewrite_expr_tree_parallel (stmt, width, ops); + else + { + /* When there are three operands left, we want + to make sure the ones that get the double + binary op are chosen wisely. */ + int len = ops.length (); + if (len >= 3) + swap_ops_for_binary_stmt (ops, len - 3, stmt); + + new_lhs = rewrite_expr_tree (stmt, 0, ops, + powi_result != NULL); + } + + /* If we combined some repeated factors into a + __builtin_powi call, multiply that result by the + reassociated operands. */ + if (powi_result) + { + gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs); + tree type = TREE_TYPE (lhs); + tree target_ssa = make_temp_ssa_name (type, NULL, + "reassocpow"); + gimple_set_lhs (lhs_stmt, target_ssa); + update_stmt (lhs_stmt); + if (lhs != new_lhs) + target_ssa = new_lhs; + mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs, + powi_result, + target_ssa); + gimple_set_location (mul_stmt, gimple_location (stmt)); + gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); + } + } + } + } + } + for (son = first_dom_son (CDI_POST_DOMINATORS, bb); + son; + son = next_dom_son (CDI_POST_DOMINATORS, son)) + reassociate_bb (son); +} + +void dump_ops_vector (FILE *file, vec<operand_entry_t> ops); +void debug_ops_vector (vec<operand_entry_t> ops); + +/* Dump the operand entry vector OPS to FILE. */ + +void +dump_ops_vector (FILE *file, vec<operand_entry_t> ops) +{ + operand_entry_t oe; + unsigned int i; + + FOR_EACH_VEC_ELT (ops, i, oe) + { + fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); + print_generic_expr (file, oe->op, 0); + } +} + +/* Dump the operand entry vector OPS to STDERR. */ + +DEBUG_FUNCTION void +debug_ops_vector (vec<operand_entry_t> ops) +{ + dump_ops_vector (stderr, ops); +} + +static void +do_reassoc (void) +{ + break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); +} + +/* Initialize the reassociation pass. */ + +static void +init_reassoc (void) +{ + int i; + long rank = 2; + int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); + + /* Find the loops, so that we can prevent moving calculations in + them. */ + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); + + memset (&reassociate_stats, 0, sizeof (reassociate_stats)); + + operand_entry_pool = create_alloc_pool ("operand entry pool", + sizeof (struct operand_entry), 30); + next_operand_entry_id = 0; + + /* Reverse RPO (Reverse Post Order) will give us something where + deeper loops come later. */ + pre_and_rev_post_order_compute (NULL, bbs, false); + bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); + operand_rank = pointer_map_create (); + + /* Give each default definition a distinct rank. This includes + parameters and the static chain. Walk backwards over all + SSA names so that we get proper rank ordering according + to tree_swap_operands_p. */ + for (i = num_ssa_names - 1; i > 0; --i) + { + tree name = ssa_name (i); + if (name && SSA_NAME_IS_DEFAULT_DEF (name)) + insert_operand_rank (name, ++rank); + } + + /* Set up rank for each BB */ + for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) + bb_rank[bbs[i]] = ++rank << 16; + + free (bbs); + calculate_dominance_info (CDI_POST_DOMINATORS); + plus_negates = vNULL; +} + +/* Cleanup after the reassociation pass, and print stats if + requested. */ + +static void +fini_reassoc (void) +{ + statistics_counter_event (cfun, "Linearized", + reassociate_stats.linearized); + statistics_counter_event (cfun, "Constants eliminated", + reassociate_stats.constants_eliminated); + statistics_counter_event (cfun, "Ops eliminated", + reassociate_stats.ops_eliminated); + statistics_counter_event (cfun, "Statements rewritten", + reassociate_stats.rewritten); + statistics_counter_event (cfun, "Built-in pow[i] calls encountered", + reassociate_stats.pows_encountered); + statistics_counter_event (cfun, "Built-in powi calls created", + reassociate_stats.pows_created); + + pointer_map_destroy (operand_rank); + free_alloc_pool (operand_entry_pool); + free (bb_rank); + plus_negates.release (); + free_dominance_info (CDI_POST_DOMINATORS); + loop_optimizer_finalize (); +} + +/* Gate and execute functions for Reassociation. */ + +static unsigned int +execute_reassoc (void) +{ + init_reassoc (); + + do_reassoc (); + repropagate_negates (); + + fini_reassoc (); + return 0; +} + +static bool +gate_tree_ssa_reassoc (void) +{ + return flag_tree_reassoc != 0; +} + +namespace { + +const pass_data pass_data_reassoc = +{ + GIMPLE_PASS, /* type */ + "reassoc", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_TREE_REASSOC, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_verify_ssa + | TODO_update_ssa_only_virtuals + | TODO_verify_flow ), /* todo_flags_finish */ +}; + +class pass_reassoc : public gimple_opt_pass +{ +public: + pass_reassoc (gcc::context *ctxt) + : gimple_opt_pass (pass_data_reassoc, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_reassoc (m_ctxt); } + bool gate () { return gate_tree_ssa_reassoc (); } + unsigned int execute () { return execute_reassoc (); } + +}; // class pass_reassoc + +} // anon namespace + +gimple_opt_pass * +make_pass_reassoc (gcc::context *ctxt) +{ + return new pass_reassoc (ctxt); +} |