diff options
Diffstat (limited to 'gcc-4.8.1/gcc/tree-ssa-reassoc.c')
-rw-r--r-- | gcc-4.8.1/gcc/tree-ssa-reassoc.c | 4300 |
1 files changed, 0 insertions, 4300 deletions
diff --git a/gcc-4.8.1/gcc/tree-ssa-reassoc.c b/gcc-4.8.1/gcc/tree-ssa-reassoc.c deleted file mode 100644 index 46994389e..000000000 --- a/gcc-4.8.1/gcc/tree-ssa-reassoc.c +++ /dev/null @@ -1,4300 +0,0 @@ -/* Reassociation for trees. - Copyright (C) 2005-2013 Free Software Foundation, Inc. - Contributed by Daniel Berlin <dan@dberlin.org> - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "tree.h" -#include "basic-block.h" -#include "gimple-pretty-print.h" -#include "tree-inline.h" -#include "tree-flow.h" -#include "gimple.h" -#include "tree-iterator.h" -#include "tree-pass.h" -#include "alloc-pool.h" -#include "vec.h" -#include "langhooks.h" -#include "pointer-set.h" -#include "cfgloop.h" -#include "flags.h" -#include "target.h" -#include "params.h" -#include "diagnostic-core.h" - -/* This is a simple global reassociation pass. It is, in part, based - on the LLVM pass of the same name (They do some things more/less - than we do, in different orders, etc). - - It consists of five steps: - - 1. Breaking up subtract operations into addition + negate, where - it would promote the reassociation of adds. - - 2. Left linearization of the expression trees, so that (A+B)+(C+D) - becomes (((A+B)+C)+D), which is easier for us to rewrite later. - During linearization, we place the operands of the binary - expressions into a vector of operand_entry_t - - 3. Optimization of the operand lists, eliminating things like a + - -a, a & a, etc. - - 3a. Combine repeated factors with the same occurrence counts - into a __builtin_powi call that will later be optimized into - an optimal number of multiplies. - - 4. Rewrite the expression trees we linearized and optimized so - they are in proper rank order. - - 5. Repropagate negates, as nothing else will clean it up ATM. - - A bit of theory on #4, since nobody seems to write anything down - about why it makes sense to do it the way they do it: - - We could do this much nicer theoretically, but don't (for reasons - explained after how to do it theoretically nice :P). - - In order to promote the most redundancy elimination, you want - binary expressions whose operands are the same rank (or - preferably, the same value) exposed to the redundancy eliminator, - for possible elimination. - - So the way to do this if we really cared, is to build the new op - tree from the leaves to the roots, merging as you go, and putting the - new op on the end of the worklist, until you are left with one - thing on the worklist. - - IE if you have to rewrite the following set of operands (listed with - rank in parentheses), with opcode PLUS_EXPR: - - a (1), b (1), c (1), d (2), e (2) - - - We start with our merge worklist empty, and the ops list with all of - those on it. - - You want to first merge all leaves of the same rank, as much as - possible. - - So first build a binary op of - - mergetmp = a + b, and put "mergetmp" on the merge worklist. - - Because there is no three operand form of PLUS_EXPR, c is not going to - be exposed to redundancy elimination as a rank 1 operand. - - So you might as well throw it on the merge worklist (you could also - consider it to now be a rank two operand, and merge it with d and e, - but in this case, you then have evicted e from a binary op. So at - least in this situation, you can't win.) - - Then build a binary op of d + e - mergetmp2 = d + e - - and put mergetmp2 on the merge worklist. - - so merge worklist = {mergetmp, c, mergetmp2} - - Continue building binary ops of these operations until you have only - one operation left on the worklist. - - So we have - - build binary op - mergetmp3 = mergetmp + c - - worklist = {mergetmp2, mergetmp3} - - mergetmp4 = mergetmp2 + mergetmp3 - - worklist = {mergetmp4} - - because we have one operation left, we can now just set the original - statement equal to the result of that operation. - - This will at least expose a + b and d + e to redundancy elimination - as binary operations. - - For extra points, you can reuse the old statements to build the - mergetmps, since you shouldn't run out. - - So why don't we do this? - - Because it's expensive, and rarely will help. Most trees we are - reassociating have 3 or less ops. If they have 2 ops, they already - will be written into a nice single binary op. If you have 3 ops, a - single simple check suffices to tell you whether the first two are of the - same rank. If so, you know to order it - - mergetmp = op1 + op2 - newstmt = mergetmp + op3 - - instead of - mergetmp = op2 + op3 - newstmt = mergetmp + op1 - - If all three are of the same rank, you can't expose them all in a - single binary operator anyway, so the above is *still* the best you - can do. - - Thus, this is what we do. When we have three ops left, we check to see - what order to put them in, and call it a day. As a nod to vector sum - reduction, we check if any of the ops are really a phi node that is a - destructive update for the associating op, and keep the destructive - update together for vector sum reduction recognition. */ - - -/* Statistics */ -static struct -{ - int linearized; - int constants_eliminated; - int ops_eliminated; - int rewritten; - int pows_encountered; - int pows_created; -} reassociate_stats; - -/* Operator, rank pair. */ -typedef struct operand_entry -{ - unsigned int rank; - int id; - tree op; - unsigned int count; -} *operand_entry_t; - -static alloc_pool operand_entry_pool; - -/* This is used to assign a unique ID to each struct operand_entry - so that qsort results are identical on different hosts. */ -static int next_operand_entry_id; - -/* Starting rank number for a given basic block, so that we can rank - operations using unmovable instructions in that BB based on the bb - depth. */ -static long *bb_rank; - -/* Operand->rank hashtable. */ -static struct pointer_map_t *operand_rank; - -/* Forward decls. */ -static long get_rank (tree); - - -/* Bias amount for loop-carried phis. We want this to be larger than - the depth of any reassociation tree we can see, but not larger than - the rank difference between two blocks. */ -#define PHI_LOOP_BIAS (1 << 15) - -/* Rank assigned to a phi statement. If STMT is a loop-carried phi of - an innermost loop, and the phi has only a single use which is inside - the loop, then the rank is the block rank of the loop latch plus an - extra bias for the loop-carried dependence. This causes expressions - calculated into an accumulator variable to be independent for each - iteration of the loop. If STMT is some other phi, the rank is the - block rank of its containing block. */ -static long -phi_rank (gimple stmt) -{ - basic_block bb = gimple_bb (stmt); - struct loop *father = bb->loop_father; - tree res; - unsigned i; - use_operand_p use; - gimple use_stmt; - - /* We only care about real loops (those with a latch). */ - if (!father->latch) - return bb_rank[bb->index]; - - /* Interesting phis must be in headers of innermost loops. */ - if (bb != father->header - || father->inner) - return bb_rank[bb->index]; - - /* Ignore virtual SSA_NAMEs. */ - res = gimple_phi_result (stmt); - if (virtual_operand_p (res)) - return bb_rank[bb->index]; - - /* The phi definition must have a single use, and that use must be - within the loop. Otherwise this isn't an accumulator pattern. */ - if (!single_imm_use (res, &use, &use_stmt) - || gimple_bb (use_stmt)->loop_father != father) - return bb_rank[bb->index]; - - /* Look for phi arguments from within the loop. If found, bias this phi. */ - for (i = 0; i < gimple_phi_num_args (stmt); i++) - { - tree arg = gimple_phi_arg_def (stmt, i); - if (TREE_CODE (arg) == SSA_NAME - && !SSA_NAME_IS_DEFAULT_DEF (arg)) - { - gimple def_stmt = SSA_NAME_DEF_STMT (arg); - if (gimple_bb (def_stmt)->loop_father == father) - return bb_rank[father->latch->index] + PHI_LOOP_BIAS; - } - } - - /* Must be an uninteresting phi. */ - return bb_rank[bb->index]; -} - -/* If EXP is an SSA_NAME defined by a PHI statement that represents a - loop-carried dependence of an innermost loop, return TRUE; else - return FALSE. */ -static bool -loop_carried_phi (tree exp) -{ - gimple phi_stmt; - long block_rank; - - if (TREE_CODE (exp) != SSA_NAME - || SSA_NAME_IS_DEFAULT_DEF (exp)) - return false; - - phi_stmt = SSA_NAME_DEF_STMT (exp); - - if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) - return false; - - /* Non-loop-carried phis have block rank. Loop-carried phis have - an additional bias added in. If this phi doesn't have block rank, - it's biased and should not be propagated. */ - block_rank = bb_rank[gimple_bb (phi_stmt)->index]; - - if (phi_rank (phi_stmt) != block_rank) - return true; - - return false; -} - -/* Return the maximum of RANK and the rank that should be propagated - from expression OP. For most operands, this is just the rank of OP. - For loop-carried phis, the value is zero to avoid undoing the bias - in favor of the phi. */ -static long -propagate_rank (long rank, tree op) -{ - long op_rank; - - if (loop_carried_phi (op)) - return rank; - - op_rank = get_rank (op); - - return MAX (rank, op_rank); -} - -/* Look up the operand rank structure for expression E. */ - -static inline long -find_operand_rank (tree e) -{ - void **slot = pointer_map_contains (operand_rank, e); - return slot ? (long) (intptr_t) *slot : -1; -} - -/* Insert {E,RANK} into the operand rank hashtable. */ - -static inline void -insert_operand_rank (tree e, long rank) -{ - void **slot; - gcc_assert (rank > 0); - slot = pointer_map_insert (operand_rank, e); - gcc_assert (!*slot); - *slot = (void *) (intptr_t) rank; -} - -/* Given an expression E, return the rank of the expression. */ - -static long -get_rank (tree e) -{ - /* Constants have rank 0. */ - if (is_gimple_min_invariant (e)) - return 0; - - /* SSA_NAME's have the rank of the expression they are the result - of. - For globals and uninitialized values, the rank is 0. - For function arguments, use the pre-setup rank. - For PHI nodes, stores, asm statements, etc, we use the rank of - the BB. - For simple operations, the rank is the maximum rank of any of - its operands, or the bb_rank, whichever is less. - I make no claims that this is optimal, however, it gives good - results. */ - - /* We make an exception to the normal ranking system to break - dependences of accumulator variables in loops. Suppose we - have a simple one-block loop containing: - - x_1 = phi(x_0, x_2) - b = a + x_1 - c = b + d - x_2 = c + e - - As shown, each iteration of the calculation into x is fully - dependent upon the iteration before it. We would prefer to - see this in the form: - - x_1 = phi(x_0, x_2) - b = a + d - c = b + e - x_2 = c + x_1 - - If the loop is unrolled, the calculations of b and c from - different iterations can be interleaved. - - To obtain this result during reassociation, we bias the rank - of the phi definition x_1 upward, when it is recognized as an - accumulator pattern. The artificial rank causes it to be - added last, providing the desired independence. */ - - if (TREE_CODE (e) == SSA_NAME) - { - gimple stmt; - long rank; - int i, n; - tree op; - - if (SSA_NAME_IS_DEFAULT_DEF (e)) - return find_operand_rank (e); - - stmt = SSA_NAME_DEF_STMT (e); - if (gimple_code (stmt) == GIMPLE_PHI) - return phi_rank (stmt); - - if (!is_gimple_assign (stmt) - || gimple_vdef (stmt)) - return bb_rank[gimple_bb (stmt)->index]; - - /* If we already have a rank for this expression, use that. */ - rank = find_operand_rank (e); - if (rank != -1) - return rank; - - /* Otherwise, find the maximum rank for the operands. As an - exception, remove the bias from loop-carried phis when propagating - the rank so that dependent operations are not also biased. */ - rank = 0; - if (gimple_assign_single_p (stmt)) - { - tree rhs = gimple_assign_rhs1 (stmt); - n = TREE_OPERAND_LENGTH (rhs); - if (n == 0) - rank = propagate_rank (rank, rhs); - else - { - for (i = 0; i < n; i++) - { - op = TREE_OPERAND (rhs, i); - - if (op != NULL_TREE) - rank = propagate_rank (rank, op); - } - } - } - else - { - n = gimple_num_ops (stmt); - for (i = 1; i < n; i++) - { - op = gimple_op (stmt, i); - gcc_assert (op); - rank = propagate_rank (rank, op); - } - } - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Rank for "); - print_generic_expr (dump_file, e, 0); - fprintf (dump_file, " is %ld\n", (rank + 1)); - } - - /* Note the rank in the hashtable so we don't recompute it. */ - insert_operand_rank (e, (rank + 1)); - return (rank + 1); - } - - /* Globals, etc, are rank 0 */ - return 0; -} - - -/* We want integer ones to end up last no matter what, since they are - the ones we can do the most with. */ -#define INTEGER_CONST_TYPE 1 << 3 -#define FLOAT_CONST_TYPE 1 << 2 -#define OTHER_CONST_TYPE 1 << 1 - -/* Classify an invariant tree into integer, float, or other, so that - we can sort them to be near other constants of the same type. */ -static inline int -constant_type (tree t) -{ - if (INTEGRAL_TYPE_P (TREE_TYPE (t))) - return INTEGER_CONST_TYPE; - else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) - return FLOAT_CONST_TYPE; - else - return OTHER_CONST_TYPE; -} - -/* qsort comparison function to sort operand entries PA and PB by rank - so that the sorted array is ordered by rank in decreasing order. */ -static int -sort_by_operand_rank (const void *pa, const void *pb) -{ - const operand_entry_t oea = *(const operand_entry_t *)pa; - const operand_entry_t oeb = *(const operand_entry_t *)pb; - - /* It's nicer for optimize_expression if constants that are likely - to fold when added/multiplied//whatever are put next to each - other. Since all constants have rank 0, order them by type. */ - if (oeb->rank == 0 && oea->rank == 0) - { - if (constant_type (oeb->op) != constant_type (oea->op)) - return constant_type (oeb->op) - constant_type (oea->op); - else - /* To make sorting result stable, we use unique IDs to determine - order. */ - return oeb->id - oea->id; - } - - /* Lastly, make sure the versions that are the same go next to each - other. We use SSA_NAME_VERSION because it's stable. */ - if ((oeb->rank - oea->rank == 0) - && TREE_CODE (oea->op) == SSA_NAME - && TREE_CODE (oeb->op) == SSA_NAME) - { - if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) - return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); - else - return oeb->id - oea->id; - } - - if (oeb->rank != oea->rank) - return oeb->rank - oea->rank; - else - return oeb->id - oea->id; -} - -/* Add an operand entry to *OPS for the tree operand OP. */ - -static void -add_to_ops_vec (vec<operand_entry_t> *ops, tree op) -{ - operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); - - oe->op = op; - oe->rank = get_rank (op); - oe->id = next_operand_entry_id++; - oe->count = 1; - ops->safe_push (oe); -} - -/* Add an operand entry to *OPS for the tree operand OP with repeat - count REPEAT. */ - -static void -add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op, - HOST_WIDE_INT repeat) -{ - operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); - - oe->op = op; - oe->rank = get_rank (op); - oe->id = next_operand_entry_id++; - oe->count = repeat; - ops->safe_push (oe); - - reassociate_stats.pows_encountered++; -} - -/* Return true if STMT is reassociable operation containing a binary - operation with tree code CODE, and is inside LOOP. */ - -static bool -is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) -{ - basic_block bb = gimple_bb (stmt); - - if (gimple_bb (stmt) == NULL) - return false; - - if (!flow_bb_inside_loop_p (loop, bb)) - return false; - - if (is_gimple_assign (stmt) - && gimple_assign_rhs_code (stmt) == code - && has_single_use (gimple_assign_lhs (stmt))) - return true; - - return false; -} - - -/* Given NAME, if NAME is defined by a unary operation OPCODE, return the - operand of the negate operation. Otherwise, return NULL. */ - -static tree -get_unary_op (tree name, enum tree_code opcode) -{ - gimple stmt = SSA_NAME_DEF_STMT (name); - - if (!is_gimple_assign (stmt)) - return NULL_TREE; - - if (gimple_assign_rhs_code (stmt) == opcode) - return gimple_assign_rhs1 (stmt); - return NULL_TREE; -} - -/* If CURR and LAST are a pair of ops that OPCODE allows us to - eliminate through equivalences, do so, remove them from OPS, and - return true. Otherwise, return false. */ - -static bool -eliminate_duplicate_pair (enum tree_code opcode, - vec<operand_entry_t> *ops, - bool *all_done, - unsigned int i, - operand_entry_t curr, - operand_entry_t last) -{ - - /* If we have two of the same op, and the opcode is & |, min, or max, - we can eliminate one of them. - If we have two of the same op, and the opcode is ^, we can - eliminate both of them. */ - - if (last && last->op == curr->op) - { - switch (opcode) - { - case MAX_EXPR: - case MIN_EXPR: - case BIT_IOR_EXPR: - case BIT_AND_EXPR: - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Equivalence: "); - print_generic_expr (dump_file, curr->op, 0); - fprintf (dump_file, " [&|minmax] "); - print_generic_expr (dump_file, last->op, 0); - fprintf (dump_file, " -> "); - print_generic_stmt (dump_file, last->op, 0); - } - - ops->ordered_remove (i); - reassociate_stats.ops_eliminated ++; - - return true; - - case BIT_XOR_EXPR: - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Equivalence: "); - print_generic_expr (dump_file, curr->op, 0); - fprintf (dump_file, " ^ "); - print_generic_expr (dump_file, last->op, 0); - fprintf (dump_file, " -> nothing\n"); - } - - reassociate_stats.ops_eliminated += 2; - - if (ops->length () == 2) - { - ops->create (0); - add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); - *all_done = true; - } - else - { - ops->ordered_remove (i-1); - ops->ordered_remove (i-1); - } - - return true; - - default: - break; - } - } - return false; -} - -static vec<tree> plus_negates; - -/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not - expression, look in OPS for a corresponding positive operation to cancel - it out. If we find one, remove the other from OPS, replace - OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, - return false. */ - -static bool -eliminate_plus_minus_pair (enum tree_code opcode, - vec<operand_entry_t> *ops, - unsigned int currindex, - operand_entry_t curr) -{ - tree negateop; - tree notop; - unsigned int i; - operand_entry_t oe; - - if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) - return false; - - negateop = get_unary_op (curr->op, NEGATE_EXPR); - notop = get_unary_op (curr->op, BIT_NOT_EXPR); - if (negateop == NULL_TREE && notop == NULL_TREE) - return false; - - /* Any non-negated version will have a rank that is one less than - the current rank. So once we hit those ranks, if we don't find - one, we can stop. */ - - for (i = currindex + 1; - ops->iterate (i, &oe) - && oe->rank >= curr->rank - 1 ; - i++) - { - if (oe->op == negateop) - { - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Equivalence: "); - print_generic_expr (dump_file, negateop, 0); - fprintf (dump_file, " + -"); - print_generic_expr (dump_file, oe->op, 0); - fprintf (dump_file, " -> 0\n"); - } - - ops->ordered_remove (i); - add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); - ops->ordered_remove (currindex); - reassociate_stats.ops_eliminated ++; - - return true; - } - else if (oe->op == notop) - { - tree op_type = TREE_TYPE (oe->op); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Equivalence: "); - print_generic_expr (dump_file, notop, 0); - fprintf (dump_file, " + ~"); - print_generic_expr (dump_file, oe->op, 0); - fprintf (dump_file, " -> -1\n"); - } - - ops->ordered_remove (i); - add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); - ops->ordered_remove (currindex); - reassociate_stats.ops_eliminated ++; - - return true; - } - } - - /* CURR->OP is a negate expr in a plus expr: save it for later - inspection in repropagate_negates(). */ - if (negateop != NULL_TREE) - plus_negates.safe_push (curr->op); - - return false; -} - -/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a - bitwise not expression, look in OPS for a corresponding operand to - cancel it out. If we find one, remove the other from OPS, replace - OPS[CURRINDEX] with 0, and return true. Otherwise, return - false. */ - -static bool -eliminate_not_pairs (enum tree_code opcode, - vec<operand_entry_t> *ops, - unsigned int currindex, - operand_entry_t curr) -{ - tree notop; - unsigned int i; - operand_entry_t oe; - - if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) - || TREE_CODE (curr->op) != SSA_NAME) - return false; - - notop = get_unary_op (curr->op, BIT_NOT_EXPR); - if (notop == NULL_TREE) - return false; - - /* Any non-not version will have a rank that is one less than - the current rank. So once we hit those ranks, if we don't find - one, we can stop. */ - - for (i = currindex + 1; - ops->iterate (i, &oe) - && oe->rank >= curr->rank - 1; - i++) - { - if (oe->op == notop) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Equivalence: "); - print_generic_expr (dump_file, notop, 0); - if (opcode == BIT_AND_EXPR) - fprintf (dump_file, " & ~"); - else if (opcode == BIT_IOR_EXPR) - fprintf (dump_file, " | ~"); - print_generic_expr (dump_file, oe->op, 0); - if (opcode == BIT_AND_EXPR) - fprintf (dump_file, " -> 0\n"); - else if (opcode == BIT_IOR_EXPR) - fprintf (dump_file, " -> -1\n"); - } - - if (opcode == BIT_AND_EXPR) - oe->op = build_zero_cst (TREE_TYPE (oe->op)); - else if (opcode == BIT_IOR_EXPR) - oe->op = build_low_bits_mask (TREE_TYPE (oe->op), - TYPE_PRECISION (TREE_TYPE (oe->op))); - - reassociate_stats.ops_eliminated += ops->length () - 1; - ops->truncate (0); - ops->quick_push (oe); - return true; - } - } - - return false; -} - -/* Use constant value that may be present in OPS to try to eliminate - operands. Note that this function is only really used when we've - eliminated ops for other reasons, or merged constants. Across - single statements, fold already does all of this, plus more. There - is little point in duplicating logic, so I've only included the - identities that I could ever construct testcases to trigger. */ - -static void -eliminate_using_constants (enum tree_code opcode, - vec<operand_entry_t> *ops) -{ - operand_entry_t oelast = ops->last (); - tree type = TREE_TYPE (oelast->op); - - if (oelast->rank == 0 - && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) - { - switch (opcode) - { - case BIT_AND_EXPR: - if (integer_zerop (oelast->op)) - { - if (ops->length () != 1) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Found & 0, removing all other ops\n"); - - reassociate_stats.ops_eliminated += ops->length () - 1; - - ops->truncate (0); - ops->quick_push (oelast); - return; - } - } - else if (integer_all_onesp (oelast->op)) - { - if (ops->length () != 1) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Found & -1, removing\n"); - ops->pop (); - reassociate_stats.ops_eliminated++; - } - } - break; - case BIT_IOR_EXPR: - if (integer_all_onesp (oelast->op)) - { - if (ops->length () != 1) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Found | -1, removing all other ops\n"); - - reassociate_stats.ops_eliminated += ops->length () - 1; - - ops->truncate (0); - ops->quick_push (oelast); - return; - } - } - else if (integer_zerop (oelast->op)) - { - if (ops->length () != 1) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Found | 0, removing\n"); - ops->pop (); - reassociate_stats.ops_eliminated++; - } - } - break; - case MULT_EXPR: - if (integer_zerop (oelast->op) - || (FLOAT_TYPE_P (type) - && !HONOR_NANS (TYPE_MODE (type)) - && !HONOR_SIGNED_ZEROS (TYPE_MODE (type)) - && real_zerop (oelast->op))) - { - if (ops->length () != 1) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Found * 0, removing all other ops\n"); - - reassociate_stats.ops_eliminated += ops->length () - 1; - ops->truncate (1); - ops->quick_push (oelast); - return; - } - } - else if (integer_onep (oelast->op) - || (FLOAT_TYPE_P (type) - && !HONOR_SNANS (TYPE_MODE (type)) - && real_onep (oelast->op))) - { - if (ops->length () != 1) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Found * 1, removing\n"); - ops->pop (); - reassociate_stats.ops_eliminated++; - return; - } - } - break; - case BIT_XOR_EXPR: - case PLUS_EXPR: - case MINUS_EXPR: - if (integer_zerop (oelast->op) - || (FLOAT_TYPE_P (type) - && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) - && fold_real_zero_addition_p (type, oelast->op, - opcode == MINUS_EXPR))) - { - if (ops->length () != 1) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Found [|^+] 0, removing\n"); - ops->pop (); - reassociate_stats.ops_eliminated++; - return; - } - } - break; - default: - break; - } - } -} - - -static void linearize_expr_tree (vec<operand_entry_t> *, gimple, - bool, bool); - -/* Structure for tracking and counting operands. */ -typedef struct oecount_s { - int cnt; - int id; - enum tree_code oecode; - tree op; -} oecount; - - -/* The heap for the oecount hashtable and the sorted list of operands. */ -static vec<oecount> cvec; - -/* Hash function for oecount. */ - -static hashval_t -oecount_hash (const void *p) -{ - const oecount *c = &cvec[(size_t)p - 42]; - return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; -} - -/* Comparison function for oecount. */ - -static int -oecount_eq (const void *p1, const void *p2) -{ - const oecount *c1 = &cvec[(size_t)p1 - 42]; - const oecount *c2 = &cvec[(size_t)p2 - 42]; - return (c1->oecode == c2->oecode - && c1->op == c2->op); -} - -/* Comparison function for qsort sorting oecount elements by count. */ - -static int -oecount_cmp (const void *p1, const void *p2) -{ - const oecount *c1 = (const oecount *)p1; - const oecount *c2 = (const oecount *)p2; - if (c1->cnt != c2->cnt) - return c1->cnt - c2->cnt; - else - /* If counts are identical, use unique IDs to stabilize qsort. */ - return c1->id - c2->id; -} - -/* Return TRUE iff STMT represents a builtin call that raises OP - to some exponent. */ - -static bool -stmt_is_power_of_op (gimple stmt, tree op) -{ - tree fndecl; - - if (!is_gimple_call (stmt)) - return false; - - fndecl = gimple_call_fndecl (stmt); - - if (!fndecl - || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) - return false; - - switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) - { - CASE_FLT_FN (BUILT_IN_POW): - CASE_FLT_FN (BUILT_IN_POWI): - return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); - - default: - return false; - } -} - -/* Given STMT which is a __builtin_pow* call, decrement its exponent - in place and return the result. Assumes that stmt_is_power_of_op - was previously called for STMT and returned TRUE. */ - -static HOST_WIDE_INT -decrement_power (gimple stmt) -{ - REAL_VALUE_TYPE c, cint; - HOST_WIDE_INT power; - tree arg1; - - switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) - { - CASE_FLT_FN (BUILT_IN_POW): - arg1 = gimple_call_arg (stmt, 1); - c = TREE_REAL_CST (arg1); - power = real_to_integer (&c) - 1; - real_from_integer (&cint, VOIDmode, power, 0, 0); - gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); - return power; - - CASE_FLT_FN (BUILT_IN_POWI): - arg1 = gimple_call_arg (stmt, 1); - power = TREE_INT_CST_LOW (arg1) - 1; - gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); - return power; - - default: - gcc_unreachable (); - } -} - -/* Find the single immediate use of STMT's LHS, and replace it - with OP. Remove STMT. If STMT's LHS is the same as *DEF, - replace *DEF with OP as well. */ - -static void -propagate_op_to_single_use (tree op, gimple stmt, tree *def) -{ - tree lhs; - gimple use_stmt; - use_operand_p use; - gimple_stmt_iterator gsi; - - if (is_gimple_call (stmt)) - lhs = gimple_call_lhs (stmt); - else - lhs = gimple_assign_lhs (stmt); - - gcc_assert (has_single_use (lhs)); - single_imm_use (lhs, &use, &use_stmt); - if (lhs == *def) - *def = op; - SET_USE (use, op); - if (TREE_CODE (op) != SSA_NAME) - update_stmt (use_stmt); - gsi = gsi_for_stmt (stmt); - unlink_stmt_vdef (stmt); - gsi_remove (&gsi, true); - release_defs (stmt); -} - -/* Walks the linear chain with result *DEF searching for an operation - with operand OP and code OPCODE removing that from the chain. *DEF - is updated if there is only one operand but no operation left. */ - -static void -zero_one_operation (tree *def, enum tree_code opcode, tree op) -{ - gimple stmt = SSA_NAME_DEF_STMT (*def); - - do - { - tree name; - - if (opcode == MULT_EXPR - && stmt_is_power_of_op (stmt, op)) - { - if (decrement_power (stmt) == 1) - propagate_op_to_single_use (op, stmt, def); - return; - } - - name = gimple_assign_rhs1 (stmt); - - /* If this is the operation we look for and one of the operands - is ours simply propagate the other operand into the stmts - single use. */ - if (gimple_assign_rhs_code (stmt) == opcode - && (name == op - || gimple_assign_rhs2 (stmt) == op)) - { - if (name == op) - name = gimple_assign_rhs2 (stmt); - propagate_op_to_single_use (name, stmt, def); - return; - } - - /* We might have a multiply of two __builtin_pow* calls, and - the operand might be hiding in the rightmost one. */ - if (opcode == MULT_EXPR - && gimple_assign_rhs_code (stmt) == opcode - && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME - && has_single_use (gimple_assign_rhs2 (stmt))) - { - gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); - if (stmt_is_power_of_op (stmt2, op)) - { - if (decrement_power (stmt2) == 1) - propagate_op_to_single_use (op, stmt2, def); - return; - } - } - - /* Continue walking the chain. */ - gcc_assert (name != op - && TREE_CODE (name) == SSA_NAME); - stmt = SSA_NAME_DEF_STMT (name); - } - while (1); -} - -/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for - the result. Places the statement after the definition of either - OP1 or OP2. Returns the new statement. */ - -static gimple -build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) -{ - gimple op1def = NULL, op2def = NULL; - gimple_stmt_iterator gsi; - tree op; - gimple sum; - - /* Create the addition statement. */ - op = make_ssa_name (type, NULL); - sum = gimple_build_assign_with_ops (opcode, op, op1, op2); - - /* Find an insertion place and insert. */ - if (TREE_CODE (op1) == SSA_NAME) - op1def = SSA_NAME_DEF_STMT (op1); - if (TREE_CODE (op2) == SSA_NAME) - op2def = SSA_NAME_DEF_STMT (op2); - if ((!op1def || gimple_nop_p (op1def)) - && (!op2def || gimple_nop_p (op2def))) - { - gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR)); - gsi_insert_before (&gsi, sum, GSI_NEW_STMT); - } - else if ((!op1def || gimple_nop_p (op1def)) - || (op2def && !gimple_nop_p (op2def) - && stmt_dominates_stmt_p (op1def, op2def))) - { - if (gimple_code (op2def) == GIMPLE_PHI) - { - gsi = gsi_after_labels (gimple_bb (op2def)); - gsi_insert_before (&gsi, sum, GSI_NEW_STMT); - } - else - { - if (!stmt_ends_bb_p (op2def)) - { - gsi = gsi_for_stmt (op2def); - gsi_insert_after (&gsi, sum, GSI_NEW_STMT); - } - else - { - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs) - if (e->flags & EDGE_FALLTHRU) - gsi_insert_on_edge_immediate (e, sum); - } - } - } - else - { - if (gimple_code (op1def) == GIMPLE_PHI) - { - gsi = gsi_after_labels (gimple_bb (op1def)); - gsi_insert_before (&gsi, sum, GSI_NEW_STMT); - } - else - { - if (!stmt_ends_bb_p (op1def)) - { - gsi = gsi_for_stmt (op1def); - gsi_insert_after (&gsi, sum, GSI_NEW_STMT); - } - else - { - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs) - if (e->flags & EDGE_FALLTHRU) - gsi_insert_on_edge_immediate (e, sum); - } - } - } - update_stmt (sum); - - return sum; -} - -/* Perform un-distribution of divisions and multiplications. - A * X + B * X is transformed into (A + B) * X and A / X + B / X - to (A + B) / X for real X. - - The algorithm is organized as follows. - - - First we walk the addition chain *OPS looking for summands that - are defined by a multiplication or a real division. This results - in the candidates bitmap with relevant indices into *OPS. - - - Second we build the chains of multiplications or divisions for - these candidates, counting the number of occurrences of (operand, code) - pairs in all of the candidates chains. - - - Third we sort the (operand, code) pairs by number of occurrence and - process them starting with the pair with the most uses. - - * For each such pair we walk the candidates again to build a - second candidate bitmap noting all multiplication/division chains - that have at least one occurrence of (operand, code). - - * We build an alternate addition chain only covering these - candidates with one (operand, code) operation removed from their - multiplication/division chain. - - * The first candidate gets replaced by the alternate addition chain - multiplied/divided by the operand. - - * All candidate chains get disabled for further processing and - processing of (operand, code) pairs continues. - - The alternate addition chains built are re-processed by the main - reassociation algorithm which allows optimizing a * x * y + b * y * x - to (a + b ) * x * y in one invocation of the reassociation pass. */ - -static bool -undistribute_ops_list (enum tree_code opcode, - vec<operand_entry_t> *ops, struct loop *loop) -{ - unsigned int length = ops->length (); - operand_entry_t oe1; - unsigned i, j; - sbitmap candidates, candidates2; - unsigned nr_candidates, nr_candidates2; - sbitmap_iterator sbi0; - vec<operand_entry_t> *subops; - htab_t ctable; - bool changed = false; - int next_oecount_id = 0; - - if (length <= 1 - || opcode != PLUS_EXPR) - return false; - - /* Build a list of candidates to process. */ - candidates = sbitmap_alloc (length); - bitmap_clear (candidates); - nr_candidates = 0; - FOR_EACH_VEC_ELT (*ops, i, oe1) - { - enum tree_code dcode; - gimple oe1def; - - if (TREE_CODE (oe1->op) != SSA_NAME) - continue; - oe1def = SSA_NAME_DEF_STMT (oe1->op); - if (!is_gimple_assign (oe1def)) - continue; - dcode = gimple_assign_rhs_code (oe1def); - if ((dcode != MULT_EXPR - && dcode != RDIV_EXPR) - || !is_reassociable_op (oe1def, dcode, loop)) - continue; - - bitmap_set_bit (candidates, i); - nr_candidates++; - } - - if (nr_candidates < 2) - { - sbitmap_free (candidates); - return false; - } - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "searching for un-distribute opportunities "); - print_generic_expr (dump_file, - (*ops)[bitmap_first_set_bit (candidates)]->op, 0); - fprintf (dump_file, " %d\n", nr_candidates); - } - - /* Build linearized sub-operand lists and the counting table. */ - cvec.create (0); - ctable = htab_create (15, oecount_hash, oecount_eq, NULL); - /* ??? Macro arguments cannot have multi-argument template types in - them. This typedef is needed to workaround that limitation. */ - typedef vec<operand_entry_t> vec_operand_entry_t_heap; - subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); - EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) - { - gimple oedef; - enum tree_code oecode; - unsigned j; - - oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); - oecode = gimple_assign_rhs_code (oedef); - linearize_expr_tree (&subops[i], oedef, - associative_tree_code (oecode), false); - - FOR_EACH_VEC_ELT (subops[i], j, oe1) - { - oecount c; - void **slot; - size_t idx; - c.oecode = oecode; - c.cnt = 1; - c.id = next_oecount_id++; - c.op = oe1->op; - cvec.safe_push (c); - idx = cvec.length () + 41; - slot = htab_find_slot (ctable, (void *)idx, INSERT); - if (!*slot) - { - *slot = (void *)idx; - } - else - { - cvec.pop (); - cvec[(size_t)*slot - 42].cnt++; - } - } - } - htab_delete (ctable); - - /* Sort the counting table. */ - cvec.qsort (oecount_cmp); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - oecount *c; - fprintf (dump_file, "Candidates:\n"); - FOR_EACH_VEC_ELT (cvec, j, c) - { - fprintf (dump_file, " %u %s: ", c->cnt, - c->oecode == MULT_EXPR - ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); - print_generic_expr (dump_file, c->op, 0); - fprintf (dump_file, "\n"); - } - } - - /* Process the (operand, code) pairs in order of most occurence. */ - candidates2 = sbitmap_alloc (length); - while (!cvec.is_empty ()) - { - oecount *c = &cvec.last (); - if (c->cnt < 2) - break; - - /* Now collect the operands in the outer chain that contain - the common operand in their inner chain. */ - bitmap_clear (candidates2); - nr_candidates2 = 0; - EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) - { - gimple oedef; - enum tree_code oecode; - unsigned j; - tree op = (*ops)[i]->op; - - /* If we undistributed in this chain already this may be - a constant. */ - if (TREE_CODE (op) != SSA_NAME) - continue; - - oedef = SSA_NAME_DEF_STMT (op); - oecode = gimple_assign_rhs_code (oedef); - if (oecode != c->oecode) - continue; - - FOR_EACH_VEC_ELT (subops[i], j, oe1) - { - if (oe1->op == c->op) - { - bitmap_set_bit (candidates2, i); - ++nr_candidates2; - break; - } - } - } - - if (nr_candidates2 >= 2) - { - operand_entry_t oe1, oe2; - gimple prod; - int first = bitmap_first_set_bit (candidates2); - - /* Build the new addition chain. */ - oe1 = (*ops)[first]; - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Building ("); - print_generic_expr (dump_file, oe1->op, 0); - } - zero_one_operation (&oe1->op, c->oecode, c->op); - EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) - { - gimple sum; - oe2 = (*ops)[i]; - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " + "); - print_generic_expr (dump_file, oe2->op, 0); - } - zero_one_operation (&oe2->op, c->oecode, c->op); - sum = build_and_add_sum (TREE_TYPE (oe1->op), - oe1->op, oe2->op, opcode); - oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); - oe2->rank = 0; - oe1->op = gimple_get_lhs (sum); - } - - /* Apply the multiplication/division. */ - prod = build_and_add_sum (TREE_TYPE (oe1->op), - oe1->op, c->op, c->oecode); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); - print_generic_expr (dump_file, c->op, 0); - fprintf (dump_file, "\n"); - } - - /* Record it in the addition chain and disable further - undistribution with this op. */ - oe1->op = gimple_assign_lhs (prod); - oe1->rank = get_rank (oe1->op); - subops[first].release (); - - changed = true; - } - - cvec.pop (); - } - - for (i = 0; i < ops->length (); ++i) - subops[i].release (); - free (subops); - cvec.release (); - sbitmap_free (candidates); - sbitmap_free (candidates2); - - return changed; -} - -/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison - expression, examine the other OPS to see if any of them are comparisons - of the same values, which we may be able to combine or eliminate. - For example, we can rewrite (a < b) | (a == b) as (a <= b). */ - -static bool -eliminate_redundant_comparison (enum tree_code opcode, - vec<operand_entry_t> *ops, - unsigned int currindex, - operand_entry_t curr) -{ - tree op1, op2; - enum tree_code lcode, rcode; - gimple def1, def2; - int i; - operand_entry_t oe; - - if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) - return false; - - /* Check that CURR is a comparison. */ - if (TREE_CODE (curr->op) != SSA_NAME) - return false; - def1 = SSA_NAME_DEF_STMT (curr->op); - if (!is_gimple_assign (def1)) - return false; - lcode = gimple_assign_rhs_code (def1); - if (TREE_CODE_CLASS (lcode) != tcc_comparison) - return false; - op1 = gimple_assign_rhs1 (def1); - op2 = gimple_assign_rhs2 (def1); - - /* Now look for a similar comparison in the remaining OPS. */ - for (i = currindex + 1; ops->iterate (i, &oe); i++) - { - tree t; - - if (TREE_CODE (oe->op) != SSA_NAME) - continue; - def2 = SSA_NAME_DEF_STMT (oe->op); - if (!is_gimple_assign (def2)) - continue; - rcode = gimple_assign_rhs_code (def2); - if (TREE_CODE_CLASS (rcode) != tcc_comparison) - continue; - - /* If we got here, we have a match. See if we can combine the - two comparisons. */ - if (opcode == BIT_IOR_EXPR) - t = maybe_fold_or_comparisons (lcode, op1, op2, - rcode, gimple_assign_rhs1 (def2), - gimple_assign_rhs2 (def2)); - else - t = maybe_fold_and_comparisons (lcode, op1, op2, - rcode, gimple_assign_rhs1 (def2), - gimple_assign_rhs2 (def2)); - if (!t) - continue; - - /* maybe_fold_and_comparisons and maybe_fold_or_comparisons - always give us a boolean_type_node value back. If the original - BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, - we need to convert. */ - if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) - t = fold_convert (TREE_TYPE (curr->op), t); - - if (TREE_CODE (t) != INTEGER_CST - && !operand_equal_p (t, curr->op, 0)) - { - enum tree_code subcode; - tree newop1, newop2; - if (!COMPARISON_CLASS_P (t)) - continue; - extract_ops_from_tree (t, &subcode, &newop1, &newop2); - STRIP_USELESS_TYPE_CONVERSION (newop1); - STRIP_USELESS_TYPE_CONVERSION (newop2); - if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) - continue; - } - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Equivalence: "); - print_generic_expr (dump_file, curr->op, 0); - fprintf (dump_file, " %s ", op_symbol_code (opcode)); - print_generic_expr (dump_file, oe->op, 0); - fprintf (dump_file, " -> "); - print_generic_expr (dump_file, t, 0); - fprintf (dump_file, "\n"); - } - - /* Now we can delete oe, as it has been subsumed by the new combined - expression t. */ - ops->ordered_remove (i); - reassociate_stats.ops_eliminated ++; - - /* If t is the same as curr->op, we're done. Otherwise we must - replace curr->op with t. Special case is if we got a constant - back, in which case we add it to the end instead of in place of - the current entry. */ - if (TREE_CODE (t) == INTEGER_CST) - { - ops->ordered_remove (currindex); - add_to_ops_vec (ops, t); - } - else if (!operand_equal_p (t, curr->op, 0)) - { - gimple sum; - enum tree_code subcode; - tree newop1; - tree newop2; - gcc_assert (COMPARISON_CLASS_P (t)); - extract_ops_from_tree (t, &subcode, &newop1, &newop2); - STRIP_USELESS_TYPE_CONVERSION (newop1); - STRIP_USELESS_TYPE_CONVERSION (newop2); - gcc_checking_assert (is_gimple_val (newop1) - && is_gimple_val (newop2)); - sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); - curr->op = gimple_get_lhs (sum); - } - return true; - } - - return false; -} - -/* Perform various identities and other optimizations on the list of - operand entries, stored in OPS. The tree code for the binary - operation between all the operands is OPCODE. */ - -static void -optimize_ops_list (enum tree_code opcode, - vec<operand_entry_t> *ops) -{ - unsigned int length = ops->length (); - unsigned int i; - operand_entry_t oe; - operand_entry_t oelast = NULL; - bool iterate = false; - - if (length == 1) - return; - - oelast = ops->last (); - - /* If the last two are constants, pop the constants off, merge them - and try the next two. */ - if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) - { - operand_entry_t oelm1 = (*ops)[length - 2]; - - if (oelm1->rank == 0 - && is_gimple_min_invariant (oelm1->op) - && useless_type_conversion_p (TREE_TYPE (oelm1->op), - TREE_TYPE (oelast->op))) - { - tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), - oelm1->op, oelast->op); - - if (folded && is_gimple_min_invariant (folded)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Merging constants\n"); - - ops->pop (); - ops->pop (); - - add_to_ops_vec (ops, folded); - reassociate_stats.constants_eliminated++; - - optimize_ops_list (opcode, ops); - return; - } - } - } - - eliminate_using_constants (opcode, ops); - oelast = NULL; - - for (i = 0; ops->iterate (i, &oe);) - { - bool done = false; - - if (eliminate_not_pairs (opcode, ops, i, oe)) - return; - if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) - || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) - || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) - { - if (done) - return; - iterate = true; - oelast = NULL; - continue; - } - oelast = oe; - i++; - } - - length = ops->length (); - oelast = ops->last (); - - if (iterate) - optimize_ops_list (opcode, ops); -} - -/* The following functions are subroutines to optimize_range_tests and allow - it to try to change a logical combination of comparisons into a range - test. - - For example, both - X == 2 || X == 5 || X == 3 || X == 4 - and - X >= 2 && X <= 5 - are converted to - (unsigned) (X - 2) <= 3 - - For more information see comments above fold_test_range in fold-const.c, - this implementation is for GIMPLE. */ - -struct range_entry -{ - tree exp; - tree low; - tree high; - bool in_p; - bool strict_overflow_p; - unsigned int idx, next; -}; - -/* This is similar to make_range in fold-const.c, but on top of - GIMPLE instead of trees. If EXP is non-NULL, it should be - an SSA_NAME and STMT argument is ignored, otherwise STMT - argument should be a GIMPLE_COND. */ - -static void -init_range_entry (struct range_entry *r, tree exp, gimple stmt) -{ - int in_p; - tree low, high; - bool is_bool, strict_overflow_p; - - r->exp = NULL_TREE; - r->in_p = false; - r->strict_overflow_p = false; - r->low = NULL_TREE; - r->high = NULL_TREE; - if (exp != NULL_TREE - && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) - return; - - /* Start with simply saying "EXP != 0" and then look at the code of EXP - and see if we can refine the range. Some of the cases below may not - happen, but it doesn't seem worth worrying about this. We "continue" - the outer loop when we've changed something; otherwise we "break" - the switch, which will "break" the while. */ - low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; - high = low; - in_p = 0; - strict_overflow_p = false; - is_bool = false; - if (exp == NULL_TREE) - is_bool = true; - else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) - { - if (TYPE_UNSIGNED (TREE_TYPE (exp))) - is_bool = true; - else - return; - } - else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) - is_bool = true; - - while (1) - { - enum tree_code code; - tree arg0, arg1, exp_type; - tree nexp; - location_t loc; - - if (exp != NULL_TREE) - { - if (TREE_CODE (exp) != SSA_NAME) - break; - - stmt = SSA_NAME_DEF_STMT (exp); - if (!is_gimple_assign (stmt)) - break; - - code = gimple_assign_rhs_code (stmt); - arg0 = gimple_assign_rhs1 (stmt); - arg1 = gimple_assign_rhs2 (stmt); - exp_type = TREE_TYPE (exp); - } - else - { - code = gimple_cond_code (stmt); - arg0 = gimple_cond_lhs (stmt); - arg1 = gimple_cond_rhs (stmt); - exp_type = boolean_type_node; - } - - if (TREE_CODE (arg0) != SSA_NAME) - break; - loc = gimple_location (stmt); - switch (code) - { - case BIT_NOT_EXPR: - if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) - { - in_p = !in_p; - exp = arg0; - continue; - } - break; - case SSA_NAME: - exp = arg0; - continue; - CASE_CONVERT: - if (is_bool) - goto do_default; - if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) - { - if (TYPE_UNSIGNED (TREE_TYPE (arg0))) - is_bool = true; - else - return; - } - else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) - is_bool = true; - goto do_default; - case EQ_EXPR: - case NE_EXPR: - case LT_EXPR: - case LE_EXPR: - case GE_EXPR: - case GT_EXPR: - is_bool = true; - /* FALLTHRU */ - default: - if (!is_bool) - return; - do_default: - nexp = make_range_step (loc, code, arg0, arg1, exp_type, - &low, &high, &in_p, - &strict_overflow_p); - if (nexp != NULL_TREE) - { - exp = nexp; - gcc_assert (TREE_CODE (exp) == SSA_NAME); - continue; - } - break; - } - break; - } - if (is_bool) - { - r->exp = exp; - r->in_p = in_p; - r->low = low; - r->high = high; - r->strict_overflow_p = strict_overflow_p; - } -} - -/* Comparison function for qsort. Sort entries - without SSA_NAME exp first, then with SSA_NAMEs sorted - by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs - by increasing ->low and if ->low is the same, by increasing - ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE - maximum. */ - -static int -range_entry_cmp (const void *a, const void *b) -{ - const struct range_entry *p = (const struct range_entry *) a; - const struct range_entry *q = (const struct range_entry *) b; - - if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) - { - if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) - { - /* Group range_entries for the same SSA_NAME together. */ - if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) - return -1; - else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) - return 1; - /* If ->low is different, NULL low goes first, then by - ascending low. */ - if (p->low != NULL_TREE) - { - if (q->low != NULL_TREE) - { - tree tem = fold_binary (LT_EXPR, boolean_type_node, - p->low, q->low); - if (tem && integer_onep (tem)) - return -1; - tem = fold_binary (GT_EXPR, boolean_type_node, - p->low, q->low); - if (tem && integer_onep (tem)) - return 1; - } - else - return 1; - } - else if (q->low != NULL_TREE) - return -1; - /* If ->high is different, NULL high goes last, before that by - ascending high. */ - if (p->high != NULL_TREE) - { - if (q->high != NULL_TREE) - { - tree tem = fold_binary (LT_EXPR, boolean_type_node, - p->high, q->high); - if (tem && integer_onep (tem)) - return -1; - tem = fold_binary (GT_EXPR, boolean_type_node, - p->high, q->high); - if (tem && integer_onep (tem)) - return 1; - } - else - return -1; - } - else if (p->high != NULL_TREE) - return 1; - /* If both ranges are the same, sort below by ascending idx. */ - } - else - return 1; - } - else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) - return -1; - - if (p->idx < q->idx) - return -1; - else - { - gcc_checking_assert (p->idx > q->idx); - return 1; - } -} - -/* Helper routine of optimize_range_test. - [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for - RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, - OPCODE and OPS are arguments of optimize_range_tests. Return - true if the range merge has been successful. - If OPCODE is ERROR_MARK, this is called from within - maybe_optimize_range_tests and is performing inter-bb range optimization. - Changes should be then performed right away, and whether an op is - BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */ - -static bool -update_range_test (struct range_entry *range, struct range_entry *otherrange, - unsigned int count, enum tree_code opcode, - vec<operand_entry_t> *ops, tree exp, bool in_p, - tree low, tree high, bool strict_overflow_p) -{ - operand_entry_t oe = (*ops)[range->idx]; - tree op = oe->op; - gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id)); - location_t loc = gimple_location (stmt); - tree optype = op ? TREE_TYPE (op) : boolean_type_node; - tree tem = build_range_check (loc, optype, exp, in_p, low, high); - enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; - gimple_stmt_iterator gsi; - - if (tem == NULL_TREE) - return false; - - if (strict_overflow_p && issue_strict_overflow_warning (wc)) - warning_at (loc, OPT_Wstrict_overflow, - "assuming signed overflow does not occur " - "when simplifying range test"); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - struct range_entry *r; - fprintf (dump_file, "Optimizing range tests "); - print_generic_expr (dump_file, range->exp, 0); - fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); - print_generic_expr (dump_file, range->low, 0); - fprintf (dump_file, ", "); - print_generic_expr (dump_file, range->high, 0); - fprintf (dump_file, "]"); - for (r = otherrange; r < otherrange + count; r++) - { - fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); - print_generic_expr (dump_file, r->low, 0); - fprintf (dump_file, ", "); - print_generic_expr (dump_file, r->high, 0); - fprintf (dump_file, "]"); - } - fprintf (dump_file, "\n into "); - print_generic_expr (dump_file, tem, 0); - fprintf (dump_file, "\n"); - } - - if (opcode == BIT_IOR_EXPR - || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) - tem = invert_truthvalue_loc (loc, tem); - - tem = fold_convert_loc (loc, optype, tem); - gsi = gsi_for_stmt (stmt); - tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, - GSI_SAME_STMT); - - /* If doing inter-bb range test optimization, update the - stmts immediately. Start with changing the first range test - immediate use to the new value (TEM), or, if the first range - test is a GIMPLE_COND stmt, change that condition. */ - if (opcode == ERROR_MARK) - { - if (op) - { - imm_use_iterator iter; - use_operand_p use_p; - gimple use_stmt; - - FOR_EACH_IMM_USE_STMT (use_stmt, iter, op) - { - if (is_gimple_debug (use_stmt)) - continue; - FOR_EACH_IMM_USE_ON_STMT (use_p, iter) - SET_USE (use_p, tem); - update_stmt (use_stmt); - } - } - else - { - gimple_cond_set_code (stmt, NE_EXPR); - gimple_cond_set_lhs (stmt, tem); - gimple_cond_set_rhs (stmt, boolean_false_node); - update_stmt (stmt); - } - } - oe->op = tem; - range->exp = exp; - range->low = low; - range->high = high; - range->in_p = in_p; - range->strict_overflow_p = false; - - for (range = otherrange; range < otherrange + count; range++) - { - oe = (*ops)[range->idx]; - /* Now change all the other range test immediate uses, so that - those tests will be optimized away. */ - if (opcode == ERROR_MARK) - { - if (oe->op) - { - imm_use_iterator iter; - use_operand_p use_p; - gimple use_stmt; - - FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op) - { - if (is_gimple_debug (use_stmt)) - continue; - /* If imm use of _8 is a statement like _7 = _8 | _9;, - adjust it into _7 = _9;. */ - if (is_gimple_assign (use_stmt) - && gimple_assign_rhs_code (use_stmt) == oe->rank) - { - tree expr = NULL_TREE; - if (oe->op == gimple_assign_rhs1 (use_stmt)) - expr = gimple_assign_rhs2 (use_stmt); - else if (oe->op == gimple_assign_rhs2 (use_stmt)) - expr = gimple_assign_rhs1 (use_stmt); - if (expr - && expr != oe->op - && TREE_CODE (expr) == SSA_NAME) - { - gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); - gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME, - expr, NULL_TREE); - update_stmt (use_stmt); - continue; - } - } - /* If imm use of _8 is a statement like _7 = (int) _8;, - adjust it into _7 = 0; or _7 = 1;. */ - if (gimple_assign_cast_p (use_stmt) - && oe->op == gimple_assign_rhs1 (use_stmt)) - { - tree lhs = gimple_assign_lhs (use_stmt); - if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))) - { - gimple_stmt_iterator gsi2 - = gsi_for_stmt (use_stmt); - tree expr = build_int_cst (TREE_TYPE (lhs), - oe->rank == BIT_IOR_EXPR - ? 0 : 1); - gimple_assign_set_rhs_with_ops (&gsi2, - INTEGER_CST, - expr, NULL_TREE); - update_stmt (use_stmt); - continue; - } - } - /* Otherwise replace the use with 0 or 1. */ - FOR_EACH_IMM_USE_ON_STMT (use_p, iter) - SET_USE (use_p, - build_int_cst (TREE_TYPE (oe->op), - oe->rank == BIT_IOR_EXPR - ? 0 : 1)); - update_stmt (use_stmt); - } - } - else - { - /* If range test was a GIMPLE_COND, simply change it - into an always false or always true condition. */ - stmt = last_stmt (BASIC_BLOCK (oe->id)); - if (oe->rank == BIT_IOR_EXPR) - gimple_cond_make_false (stmt); - else - gimple_cond_make_true (stmt); - update_stmt (stmt); - } - } - oe->op = error_mark_node; - range->exp = NULL_TREE; - } - return true; -} - -/* Optimize range tests, similarly how fold_range_test optimizes - it on trees. The tree code for the binary - operation between all the operands is OPCODE. - If OPCODE is ERROR_MARK, optimize_range_tests is called from within - maybe_optimize_range_tests for inter-bb range optimization. - In that case if oe->op is NULL, oe->id is bb->index whose - GIMPLE_COND is && or ||ed into the test, and oe->rank says - the actual opcode. */ - -static void -optimize_range_tests (enum tree_code opcode, - vec<operand_entry_t> *ops) -{ - unsigned int length = ops->length (), i, j, first; - operand_entry_t oe; - struct range_entry *ranges; - bool any_changes = false; - - if (length == 1) - return; - - ranges = XNEWVEC (struct range_entry, length); - for (i = 0; i < length; i++) - { - oe = (*ops)[i]; - ranges[i].idx = i; - init_range_entry (ranges + i, oe->op, - oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id))); - /* For | invert it now, we will invert it again before emitting - the optimized expression. */ - if (opcode == BIT_IOR_EXPR - || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) - ranges[i].in_p = !ranges[i].in_p; - } - - qsort (ranges, length, sizeof (*ranges), range_entry_cmp); - for (i = 0; i < length; i++) - if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) - break; - - /* Try to merge ranges. */ - for (first = i; i < length; i++) - { - tree low = ranges[i].low; - tree high = ranges[i].high; - int in_p = ranges[i].in_p; - bool strict_overflow_p = ranges[i].strict_overflow_p; - int update_fail_count = 0; - - for (j = i + 1; j < length; j++) - { - if (ranges[i].exp != ranges[j].exp) - break; - if (!merge_ranges (&in_p, &low, &high, in_p, low, high, - ranges[j].in_p, ranges[j].low, ranges[j].high)) - break; - strict_overflow_p |= ranges[j].strict_overflow_p; - } - - if (j == i + 1) - continue; - - if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode, - ops, ranges[i].exp, in_p, low, high, - strict_overflow_p)) - { - i = j - 1; - any_changes = true; - } - /* Avoid quadratic complexity if all merge_ranges calls would succeed, - while update_range_test would fail. */ - else if (update_fail_count == 64) - i = j - 1; - else - ++update_fail_count; - } - - /* Optimize X == CST1 || X == CST2 - if popcount (CST1 ^ CST2) == 1 into - (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). - Similarly for ranges. E.g. - X != 2 && X != 3 && X != 10 && X != 11 - will be transformed by the above loop into - (X - 2U) <= 1U && (X - 10U) <= 1U - and this loop can transform that into - ((X & ~8) - 2U) <= 1U. */ - for (i = first; i < length; i++) - { - tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp; - - if (ranges[i].exp == NULL_TREE || ranges[i].in_p) - continue; - type = TREE_TYPE (ranges[i].exp); - if (!INTEGRAL_TYPE_P (type)) - continue; - lowi = ranges[i].low; - if (lowi == NULL_TREE) - lowi = TYPE_MIN_VALUE (type); - highi = ranges[i].high; - if (highi == NULL_TREE) - continue; - for (j = i + 1; j < length && j < i + 64; j++) - { - if (ranges[j].exp == NULL_TREE) - continue; - if (ranges[i].exp != ranges[j].exp) - break; - if (ranges[j].in_p) - continue; - lowj = ranges[j].low; - if (lowj == NULL_TREE) - continue; - highj = ranges[j].high; - if (highj == NULL_TREE) - highj = TYPE_MAX_VALUE (type); - tem = fold_binary (GT_EXPR, boolean_type_node, - lowj, highi); - if (tem == NULL_TREE || !integer_onep (tem)) - continue; - lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); - if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) - continue; - gcc_checking_assert (!integer_zerop (lowxor)); - tem = fold_binary (MINUS_EXPR, type, lowxor, - build_int_cst (type, 1)); - if (tem == NULL_TREE) - continue; - tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem); - if (tem == NULL_TREE || !integer_zerop (tem)) - continue; - highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); - if (!tree_int_cst_equal (lowxor, highxor)) - continue; - tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); - exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem); - lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); - highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); - if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp, - ranges[i].in_p, lowj, highj, - ranges[i].strict_overflow_p - || ranges[j].strict_overflow_p)) - { - any_changes = true; - break; - } - } - } - - if (any_changes && opcode != ERROR_MARK) - { - j = 0; - FOR_EACH_VEC_ELT (*ops, i, oe) - { - if (oe->op == error_mark_node) - continue; - else if (i != j) - (*ops)[j] = oe; - j++; - } - ops->truncate (j); - } - - XDELETEVEC (ranges); -} - -/* Return true if STMT is a cast like: - <bb N>: - ... - _123 = (int) _234; - - <bb M>: - # _345 = PHI <_123(N), 1(...), 1(...)> - where _234 has bool type, _123 has single use and - bb N has a single successor M. This is commonly used in - the last block of a range test. */ - -static bool -final_range_test_p (gimple stmt) -{ - basic_block bb, rhs_bb; - edge e; - tree lhs, rhs; - use_operand_p use_p; - gimple use_stmt; - - if (!gimple_assign_cast_p (stmt)) - return false; - bb = gimple_bb (stmt); - if (!single_succ_p (bb)) - return false; - e = single_succ_edge (bb); - if (e->flags & EDGE_COMPLEX) - return false; - - lhs = gimple_assign_lhs (stmt); - rhs = gimple_assign_rhs1 (stmt); - if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || TREE_CODE (rhs) != SSA_NAME - || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE) - return false; - - /* Test whether lhs is consumed only by a PHI in the only successor bb. */ - if (!single_imm_use (lhs, &use_p, &use_stmt)) - return false; - - if (gimple_code (use_stmt) != GIMPLE_PHI - || gimple_bb (use_stmt) != e->dest) - return false; - - /* And that the rhs is defined in the same loop. */ - rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)); - if (rhs_bb == NULL - || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) - return false; - - return true; -} - -/* Return true if BB is suitable basic block for inter-bb range test - optimization. If BACKWARD is true, BB should be the only predecessor - of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, - or compared with to find a common basic block to which all conditions - branch to if true resp. false. If BACKWARD is false, TEST_BB should - be the only predecessor of BB. */ - -static bool -suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, - bool backward) -{ - edge_iterator ei, ei2; - edge e, e2; - gimple stmt; - gimple_stmt_iterator gsi; - bool other_edge_seen = false; - bool is_cond; - - if (test_bb == bb) - return false; - /* Check last stmt first. */ - stmt = last_stmt (bb); - if (stmt == NULL - || (gimple_code (stmt) != GIMPLE_COND - && (backward || !final_range_test_p (stmt))) - || gimple_visited_p (stmt) - || stmt_could_throw_p (stmt) - || *other_bb == bb) - return false; - is_cond = gimple_code (stmt) == GIMPLE_COND; - if (is_cond) - { - /* If last stmt is GIMPLE_COND, verify that one of the succ edges - goes to the next bb (if BACKWARD, it is TEST_BB), and the other - to *OTHER_BB (if not set yet, try to find it out). */ - if (EDGE_COUNT (bb->succs) != 2) - return false; - FOR_EACH_EDGE (e, ei, bb->succs) - { - if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) - return false; - if (e->dest == test_bb) - { - if (backward) - continue; - else - return false; - } - if (e->dest == bb) - return false; - if (*other_bb == NULL) - { - FOR_EACH_EDGE (e2, ei2, test_bb->succs) - if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) - return false; - else if (e->dest == e2->dest) - *other_bb = e->dest; - if (*other_bb == NULL) - return false; - } - if (e->dest == *other_bb) - other_edge_seen = true; - else if (backward) - return false; - } - if (*other_bb == NULL || !other_edge_seen) - return false; - } - else if (single_succ (bb) != *other_bb) - return false; - - /* Now check all PHIs of *OTHER_BB. */ - e = find_edge (bb, *other_bb); - e2 = find_edge (test_bb, *other_bb); - for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple phi = gsi_stmt (gsi); - /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments - corresponding to BB and TEST_BB predecessor must be the same. */ - if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), - gimple_phi_arg_def (phi, e2->dest_idx), 0)) - { - /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, - one of the PHIs should have the lhs of the last stmt in - that block as PHI arg and that PHI should have 0 or 1 - corresponding to it in all other range test basic blocks - considered. */ - if (!is_cond) - { - if (gimple_phi_arg_def (phi, e->dest_idx) - == gimple_assign_lhs (stmt) - && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) - || integer_onep (gimple_phi_arg_def (phi, - e2->dest_idx)))) - continue; - } - else - { - gimple test_last = last_stmt (test_bb); - if (gimple_code (test_last) != GIMPLE_COND - && gimple_phi_arg_def (phi, e2->dest_idx) - == gimple_assign_lhs (test_last) - && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) - || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) - continue; - } - - return false; - } - } - return true; -} - -/* Return true if BB doesn't have side-effects that would disallow - range test optimization, all SSA_NAMEs set in the bb are consumed - in the bb and there are no PHIs. */ - -static bool -no_side_effect_bb (basic_block bb) -{ - gimple_stmt_iterator gsi; - gimple last; - - if (!gimple_seq_empty_p (phi_nodes (bb))) - return false; - last = last_stmt (bb); - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple stmt = gsi_stmt (gsi); - tree lhs; - imm_use_iterator imm_iter; - use_operand_p use_p; - - if (is_gimple_debug (stmt)) - continue; - if (gimple_has_side_effects (stmt)) - return false; - if (stmt == last) - return true; - if (!is_gimple_assign (stmt)) - return false; - lhs = gimple_assign_lhs (stmt); - if (TREE_CODE (lhs) != SSA_NAME) - return false; - if (gimple_assign_rhs_could_trap_p (stmt)) - return false; - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) - { - gimple use_stmt = USE_STMT (use_p); - if (is_gimple_debug (use_stmt)) - continue; - if (gimple_bb (use_stmt) != bb) - return false; - } - } - return false; -} - -/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, - return true and fill in *OPS recursively. */ - -static bool -get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops, - struct loop *loop) -{ - gimple stmt = SSA_NAME_DEF_STMT (var); - tree rhs[2]; - int i; - - if (!is_reassociable_op (stmt, code, loop)) - return false; - - rhs[0] = gimple_assign_rhs1 (stmt); - rhs[1] = gimple_assign_rhs2 (stmt); - gimple_set_visited (stmt, true); - for (i = 0; i < 2; i++) - if (TREE_CODE (rhs[i]) == SSA_NAME - && !get_ops (rhs[i], code, ops, loop) - && has_single_use (rhs[i])) - { - operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); - - oe->op = rhs[i]; - oe->rank = code; - oe->id = 0; - oe->count = 1; - ops->safe_push (oe); - } - return true; -} - -/* Inter-bb range test optimization. */ - -static void -maybe_optimize_range_tests (gimple stmt) -{ - basic_block first_bb = gimple_bb (stmt); - basic_block last_bb = first_bb; - basic_block other_bb = NULL; - basic_block bb; - edge_iterator ei; - edge e; - vec<operand_entry_t> ops = vNULL; - - /* Consider only basic blocks that end with GIMPLE_COND or - a cast statement satisfying final_range_test_p. All - but the last bb in the first_bb .. last_bb range - should end with GIMPLE_COND. */ - if (gimple_code (stmt) == GIMPLE_COND) - { - if (EDGE_COUNT (first_bb->succs) != 2) - return; - } - else if (final_range_test_p (stmt)) - other_bb = single_succ (first_bb); - else - return; - - if (stmt_could_throw_p (stmt)) - return; - - /* As relative ordering of post-dominator sons isn't fixed, - maybe_optimize_range_tests can be called first on any - bb in the range we want to optimize. So, start searching - backwards, if first_bb can be set to a predecessor. */ - while (single_pred_p (first_bb)) - { - basic_block pred_bb = single_pred (first_bb); - if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) - break; - if (!no_side_effect_bb (first_bb)) - break; - first_bb = pred_bb; - } - /* If first_bb is last_bb, other_bb hasn't been computed yet. - Before starting forward search in last_bb successors, find - out the other_bb. */ - if (first_bb == last_bb) - { - other_bb = NULL; - /* As non-GIMPLE_COND last stmt always terminates the range, - if forward search didn't discover anything, just give up. */ - if (gimple_code (stmt) != GIMPLE_COND) - return; - /* Look at both successors. Either it ends with a GIMPLE_COND - and satisfies suitable_cond_bb, or ends with a cast and - other_bb is that cast's successor. */ - FOR_EACH_EDGE (e, ei, first_bb->succs) - if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) - || e->dest == first_bb) - return; - else if (single_pred_p (e->dest)) - { - stmt = last_stmt (e->dest); - if (stmt - && gimple_code (stmt) == GIMPLE_COND - && EDGE_COUNT (e->dest->succs) == 2) - { - if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) - break; - else - other_bb = NULL; - } - else if (stmt - && final_range_test_p (stmt) - && find_edge (first_bb, single_succ (e->dest))) - { - other_bb = single_succ (e->dest); - if (other_bb == first_bb) - other_bb = NULL; - } - } - if (other_bb == NULL) - return; - } - /* Now do the forward search, moving last_bb to successor bbs - that aren't other_bb. */ - while (EDGE_COUNT (last_bb->succs) == 2) - { - FOR_EACH_EDGE (e, ei, last_bb->succs) - if (e->dest != other_bb) - break; - if (e == NULL) - break; - if (!single_pred_p (e->dest)) - break; - if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) - break; - if (!no_side_effect_bb (e->dest)) - break; - last_bb = e->dest; - } - if (first_bb == last_bb) - return; - /* Here basic blocks first_bb through last_bb's predecessor - end with GIMPLE_COND, all of them have one of the edges to - other_bb and another to another block in the range, - all blocks except first_bb don't have side-effects and - last_bb ends with either GIMPLE_COND, or cast satisfying - final_range_test_p. */ - for (bb = last_bb; ; bb = single_pred (bb)) - { - enum tree_code code; - tree lhs, rhs; - - e = find_edge (bb, other_bb); - stmt = last_stmt (bb); - gimple_set_visited (stmt, true); - if (gimple_code (stmt) != GIMPLE_COND) - { - use_operand_p use_p; - gimple phi; - edge e2; - unsigned int d; - - lhs = gimple_assign_lhs (stmt); - rhs = gimple_assign_rhs1 (stmt); - gcc_assert (bb == last_bb); - - /* stmt is - _123 = (int) _234; - - followed by: - <bb M>: - # _345 = PHI <_123(N), 1(...), 1(...)> - - or 0 instead of 1. If it is 0, the _234 - range test is anded together with all the - other range tests, if it is 1, it is ored with - them. */ - single_imm_use (lhs, &use_p, &phi); - gcc_assert (gimple_code (phi) == GIMPLE_PHI); - e2 = find_edge (first_bb, other_bb); - d = e2->dest_idx; - gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); - if (integer_zerop (gimple_phi_arg_def (phi, d))) - code = BIT_AND_EXPR; - else - { - gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); - code = BIT_IOR_EXPR; - } - - /* If _234 SSA_NAME_DEF_STMT is - _234 = _567 | _789; - (or &, corresponding to 1/0 in the phi arguments, - push into ops the individual range test arguments - of the bitwise or resp. and, recursively. */ - if (!get_ops (rhs, code, &ops, - loop_containing_stmt (stmt)) - && has_single_use (rhs)) - { - /* Otherwise, push the _234 range test itself. */ - operand_entry_t oe - = (operand_entry_t) pool_alloc (operand_entry_pool); - - oe->op = rhs; - oe->rank = code; - oe->id = 0; - oe->count = 1; - ops.safe_push (oe); - } - continue; - } - /* Otherwise stmt is GIMPLE_COND. */ - code = gimple_cond_code (stmt); - lhs = gimple_cond_lhs (stmt); - rhs = gimple_cond_rhs (stmt); - if (TREE_CODE (lhs) == SSA_NAME - && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - && ((code != EQ_EXPR && code != NE_EXPR) - || rhs != boolean_false_node - /* Either push into ops the individual bitwise - or resp. and operands, depending on which - edge is other_bb. */ - || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) - ^ (code == EQ_EXPR)) - ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, - loop_containing_stmt (stmt)))) - { - /* Or push the GIMPLE_COND stmt itself. */ - operand_entry_t oe - = (operand_entry_t) pool_alloc (operand_entry_pool); - - oe->op = NULL; - oe->rank = (e->flags & EDGE_TRUE_VALUE) - ? BIT_IOR_EXPR : BIT_AND_EXPR; - /* oe->op = NULL signs that there is no SSA_NAME - for the range test, and oe->id instead is the - basic block number, at which's end the GIMPLE_COND - is. */ - oe->id = bb->index; - oe->count = 1; - ops.safe_push (oe); - } - if (bb == first_bb) - break; - } - if (ops.length () > 1) - optimize_range_tests (ERROR_MARK, &ops); - ops.release (); -} - -/* Return true if OPERAND is defined by a PHI node which uses the LHS - of STMT in it's operands. This is also known as a "destructive - update" operation. */ - -static bool -is_phi_for_stmt (gimple stmt, tree operand) -{ - gimple def_stmt; - tree lhs; - use_operand_p arg_p; - ssa_op_iter i; - - if (TREE_CODE (operand) != SSA_NAME) - return false; - - lhs = gimple_assign_lhs (stmt); - - def_stmt = SSA_NAME_DEF_STMT (operand); - if (gimple_code (def_stmt) != GIMPLE_PHI) - return false; - - FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) - if (lhs == USE_FROM_PTR (arg_p)) - return true; - return false; -} - -/* Remove def stmt of VAR if VAR has zero uses and recurse - on rhs1 operand if so. */ - -static void -remove_visited_stmt_chain (tree var) -{ - gimple stmt; - gimple_stmt_iterator gsi; - - while (1) - { - if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) - return; - stmt = SSA_NAME_DEF_STMT (var); - if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) - { - var = gimple_assign_rhs1 (stmt); - gsi = gsi_for_stmt (stmt); - gsi_remove (&gsi, true); - release_defs (stmt); - } - else - return; - } -} - -/* This function checks three consequtive operands in - passed operands vector OPS starting from OPINDEX and - swaps two operands if it is profitable for binary operation - consuming OPINDEX + 1 abnd OPINDEX + 2 operands. - - We pair ops with the same rank if possible. - - The alternative we try is to see if STMT is a destructive - update style statement, which is like: - b = phi (a, ...) - a = c + b; - In that case, we want to use the destructive update form to - expose the possible vectorizer sum reduction opportunity. - In that case, the third operand will be the phi node. This - check is not performed if STMT is null. - - We could, of course, try to be better as noted above, and do a - lot of work to try to find these opportunities in >3 operand - cases, but it is unlikely to be worth it. */ - -static void -swap_ops_for_binary_stmt (vec<operand_entry_t> ops, - unsigned int opindex, gimple stmt) -{ - operand_entry_t oe1, oe2, oe3; - - oe1 = ops[opindex]; - oe2 = ops[opindex + 1]; - oe3 = ops[opindex + 2]; - - if ((oe1->rank == oe2->rank - && oe2->rank != oe3->rank) - || (stmt && is_phi_for_stmt (stmt, oe3->op) - && !is_phi_for_stmt (stmt, oe1->op) - && !is_phi_for_stmt (stmt, oe2->op))) - { - struct operand_entry temp = *oe3; - oe3->op = oe1->op; - oe3->rank = oe1->rank; - oe1->op = temp.op; - oe1->rank= temp.rank; - } - else if ((oe1->rank == oe3->rank - && oe2->rank != oe3->rank) - || (stmt && is_phi_for_stmt (stmt, oe2->op) - && !is_phi_for_stmt (stmt, oe1->op) - && !is_phi_for_stmt (stmt, oe3->op))) - { - struct operand_entry temp = *oe2; - oe2->op = oe1->op; - oe2->rank = oe1->rank; - oe1->op = temp.op; - oe1->rank= temp.rank; - } -} - -/* Recursively rewrite our linearized statements so that the operators - match those in OPS[OPINDEX], putting the computation in rank - order. */ - -static void -rewrite_expr_tree (gimple stmt, unsigned int opindex, - vec<operand_entry_t> ops, bool moved) -{ - tree rhs1 = gimple_assign_rhs1 (stmt); - tree rhs2 = gimple_assign_rhs2 (stmt); - operand_entry_t oe; - - /* If we have three operands left, then we want to make sure the ones - that get the double binary op are chosen wisely. */ - if (opindex + 3 == ops.length ()) - swap_ops_for_binary_stmt (ops, opindex, stmt); - - /* The final recursion case for this function is that you have - exactly two operations left. - If we had one exactly one op in the entire list to start with, we - would have never called this function, and the tail recursion - rewrites them one at a time. */ - if (opindex + 2 == ops.length ()) - { - operand_entry_t oe1, oe2; - - oe1 = ops[opindex]; - oe2 = ops[opindex + 1]; - - if (rhs1 != oe1->op || rhs2 != oe2->op) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Transforming "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - gimple_assign_set_rhs1 (stmt, oe1->op); - gimple_assign_set_rhs2 (stmt, oe2->op); - update_stmt (stmt); - if (rhs1 != oe1->op && rhs1 != oe2->op) - remove_visited_stmt_chain (rhs1); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " into "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - } - return; - } - - /* If we hit here, we should have 3 or more ops left. */ - gcc_assert (opindex + 2 < ops.length ()); - - /* Rewrite the next operator. */ - oe = ops[opindex]; - - if (oe->op != rhs2) - { - if (!moved) - { - gimple_stmt_iterator gsinow, gsirhs1; - gimple stmt1 = stmt, stmt2; - unsigned int count; - - gsinow = gsi_for_stmt (stmt); - count = ops.length () - opindex - 2; - while (count-- != 0) - { - stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1)); - gsirhs1 = gsi_for_stmt (stmt2); - gsi_move_before (&gsirhs1, &gsinow); - gsi_prev (&gsinow); - stmt1 = stmt2; - } - moved = true; - } - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Transforming "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - gimple_assign_set_rhs2 (stmt, oe->op); - update_stmt (stmt); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " into "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - } - /* Recurse on the LHS of the binary operator, which is guaranteed to - be the non-leaf side. */ - rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved); -} - -/* Find out how many cycles we need to compute statements chain. - OPS_NUM holds number os statements in a chain. CPU_WIDTH is a - maximum number of independent statements we may execute per cycle. */ - -static int -get_required_cycles (int ops_num, int cpu_width) -{ - int res; - int elog; - unsigned int rest; - - /* While we have more than 2 * cpu_width operands - we may reduce number of operands by cpu_width - per cycle. */ - res = ops_num / (2 * cpu_width); - - /* Remained operands count may be reduced twice per cycle - until we have only one operand. */ - rest = (unsigned)(ops_num - res * cpu_width); - elog = exact_log2 (rest); - if (elog >= 0) - res += elog; - else - res += floor_log2 (rest) + 1; - - return res; -} - -/* Returns an optimal number of registers to use for computation of - given statements. */ - -static int -get_reassociation_width (int ops_num, enum tree_code opc, - enum machine_mode mode) -{ - int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); - int width; - int width_min; - int cycles_best; - - if (param_width > 0) - width = param_width; - else - width = targetm.sched.reassociation_width (opc, mode); - - if (width == 1) - return width; - - /* Get the minimal time required for sequence computation. */ - cycles_best = get_required_cycles (ops_num, width); - - /* Check if we may use less width and still compute sequence for - the same time. It will allow us to reduce registers usage. - get_required_cycles is monotonically increasing with lower width - so we can perform a binary search for the minimal width that still - results in the optimal cycle count. */ - width_min = 1; - while (width > width_min) - { - int width_mid = (width + width_min) / 2; - - if (get_required_cycles (ops_num, width_mid) == cycles_best) - width = width_mid; - else if (width_min < width_mid) - width_min = width_mid; - else - break; - } - - return width; -} - -/* Recursively rewrite our linearized statements so that the operators - match those in OPS[OPINDEX], putting the computation in rank - order and trying to allow operations to be executed in - parallel. */ - -static void -rewrite_expr_tree_parallel (gimple stmt, int width, - vec<operand_entry_t> ops) -{ - enum tree_code opcode = gimple_assign_rhs_code (stmt); - int op_num = ops.length (); - int stmt_num = op_num - 1; - gimple *stmts = XALLOCAVEC (gimple, stmt_num); - int op_index = op_num - 1; - int stmt_index = 0; - int ready_stmts_end = 0; - int i = 0; - tree last_rhs1 = gimple_assign_rhs1 (stmt); - - /* We start expression rewriting from the top statements. - So, in this loop we create a full list of statements - we will work with. */ - stmts[stmt_num - 1] = stmt; - for (i = stmt_num - 2; i >= 0; i--) - stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); - - for (i = 0; i < stmt_num; i++) - { - tree op1, op2; - - /* Determine whether we should use results of - already handled statements or not. */ - if (ready_stmts_end == 0 - && (i - stmt_index >= width || op_index < 1)) - ready_stmts_end = i; - - /* Now we choose operands for the next statement. Non zero - value in ready_stmts_end means here that we should use - the result of already generated statements as new operand. */ - if (ready_stmts_end > 0) - { - op1 = gimple_assign_lhs (stmts[stmt_index++]); - if (ready_stmts_end > stmt_index) - op2 = gimple_assign_lhs (stmts[stmt_index++]); - else if (op_index >= 0) - op2 = ops[op_index--]->op; - else - { - gcc_assert (stmt_index < i); - op2 = gimple_assign_lhs (stmts[stmt_index++]); - } - - if (stmt_index >= ready_stmts_end) - ready_stmts_end = 0; - } - else - { - if (op_index > 1) - swap_ops_for_binary_stmt (ops, op_index - 2, NULL); - op2 = ops[op_index--]->op; - op1 = ops[op_index--]->op; - } - - /* If we emit the last statement then we should put - operands into the last statement. It will also - break the loop. */ - if (op_index < 0 && stmt_index == i) - i = stmt_num - 1; - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Transforming "); - print_gimple_stmt (dump_file, stmts[i], 0, 0); - } - - /* We keep original statement only for the last one. All - others are recreated. */ - if (i == stmt_num - 1) - { - gimple_assign_set_rhs1 (stmts[i], op1); - gimple_assign_set_rhs2 (stmts[i], op2); - update_stmt (stmts[i]); - } - else - stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " into "); - print_gimple_stmt (dump_file, stmts[i], 0, 0); - } - } - - remove_visited_stmt_chain (last_rhs1); -} - -/* Transform STMT, which is really (A +B) + (C + D) into the left - linear form, ((A+B)+C)+D. - Recurse on D if necessary. */ - -static void -linearize_expr (gimple stmt) -{ - gimple_stmt_iterator gsinow, gsirhs; - gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); - gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); - enum tree_code rhscode = gimple_assign_rhs_code (stmt); - gimple newbinrhs = NULL; - struct loop *loop = loop_containing_stmt (stmt); - - gcc_assert (is_reassociable_op (binlhs, rhscode, loop) - && is_reassociable_op (binrhs, rhscode, loop)); - - gsinow = gsi_for_stmt (stmt); - gsirhs = gsi_for_stmt (binrhs); - gsi_move_before (&gsirhs, &gsinow); - - gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); - gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs)); - gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); - - if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) - newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Linearized: "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - reassociate_stats.linearized++; - update_stmt (binrhs); - update_stmt (binlhs); - update_stmt (stmt); - - gimple_set_visited (stmt, true); - gimple_set_visited (binlhs, true); - gimple_set_visited (binrhs, true); - - /* Tail recurse on the new rhs if it still needs reassociation. */ - if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) - /* ??? This should probably be linearize_expr (newbinrhs) but I don't - want to change the algorithm while converting to tuples. */ - linearize_expr (stmt); -} - -/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return - it. Otherwise, return NULL. */ - -static gimple -get_single_immediate_use (tree lhs) -{ - use_operand_p immuse; - gimple immusestmt; - - if (TREE_CODE (lhs) == SSA_NAME - && single_imm_use (lhs, &immuse, &immusestmt) - && is_gimple_assign (immusestmt)) - return immusestmt; - - return NULL; -} - -/* Recursively negate the value of TONEGATE, and return the SSA_NAME - representing the negated value. Insertions of any necessary - instructions go before GSI. - This function is recursive in that, if you hand it "a_5" as the - value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will - transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ - -static tree -negate_value (tree tonegate, gimple_stmt_iterator *gsi) -{ - gimple negatedefstmt= NULL; - tree resultofnegate; - - /* If we are trying to negate a name, defined by an add, negate the - add operands instead. */ - if (TREE_CODE (tonegate) == SSA_NAME) - negatedefstmt = SSA_NAME_DEF_STMT (tonegate); - if (TREE_CODE (tonegate) == SSA_NAME - && is_gimple_assign (negatedefstmt) - && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME - && has_single_use (gimple_assign_lhs (negatedefstmt)) - && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) - { - gimple_stmt_iterator gsi; - tree rhs1 = gimple_assign_rhs1 (negatedefstmt); - tree rhs2 = gimple_assign_rhs2 (negatedefstmt); - - gsi = gsi_for_stmt (negatedefstmt); - rhs1 = negate_value (rhs1, &gsi); - gimple_assign_set_rhs1 (negatedefstmt, rhs1); - - gsi = gsi_for_stmt (negatedefstmt); - rhs2 = negate_value (rhs2, &gsi); - gimple_assign_set_rhs2 (negatedefstmt, rhs2); - - update_stmt (negatedefstmt); - return gimple_assign_lhs (negatedefstmt); - } - - tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); - resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true, - NULL_TREE, true, GSI_SAME_STMT); - return resultofnegate; -} - -/* Return true if we should break up the subtract in STMT into an add - with negate. This is true when we the subtract operands are really - adds, or the subtract itself is used in an add expression. In - either case, breaking up the subtract into an add with negate - exposes the adds to reassociation. */ - -static bool -should_break_up_subtract (gimple stmt) -{ - tree lhs = gimple_assign_lhs (stmt); - tree binlhs = gimple_assign_rhs1 (stmt); - tree binrhs = gimple_assign_rhs2 (stmt); - gimple immusestmt; - struct loop *loop = loop_containing_stmt (stmt); - - if (TREE_CODE (binlhs) == SSA_NAME - && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) - return true; - - if (TREE_CODE (binrhs) == SSA_NAME - && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) - return true; - - if (TREE_CODE (lhs) == SSA_NAME - && (immusestmt = get_single_immediate_use (lhs)) - && is_gimple_assign (immusestmt) - && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR - || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) - return true; - return false; -} - -/* Transform STMT from A - B into A + -B. */ - -static void -break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) -{ - tree rhs1 = gimple_assign_rhs1 (stmt); - tree rhs2 = gimple_assign_rhs2 (stmt); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Breaking up subtract "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - rhs2 = negate_value (rhs2, gsip); - gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); - update_stmt (stmt); -} - -/* Determine whether STMT is a builtin call that raises an SSA name - to an integer power and has only one use. If so, and this is early - reassociation and unsafe math optimizations are permitted, place - the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. - If any of these conditions does not hold, return FALSE. */ - -static bool -acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent) -{ - tree fndecl, arg1; - REAL_VALUE_TYPE c, cint; - - if (!first_pass_instance - || !flag_unsafe_math_optimizations - || !is_gimple_call (stmt) - || !has_single_use (gimple_call_lhs (stmt))) - return false; - - fndecl = gimple_call_fndecl (stmt); - - if (!fndecl - || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) - return false; - - switch (DECL_FUNCTION_CODE (fndecl)) - { - CASE_FLT_FN (BUILT_IN_POW): - *base = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - - if (TREE_CODE (arg1) != REAL_CST) - return false; - - c = TREE_REAL_CST (arg1); - - if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) - return false; - - *exponent = real_to_integer (&c); - real_from_integer (&cint, VOIDmode, *exponent, - *exponent < 0 ? -1 : 0, 0); - if (!real_identical (&c, &cint)) - return false; - - break; - - CASE_FLT_FN (BUILT_IN_POWI): - *base = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - - if (!host_integerp (arg1, 0)) - return false; - - *exponent = TREE_INT_CST_LOW (arg1); - break; - - default: - return false; - } - - /* Expanding negative exponents is generally unproductive, so we don't - complicate matters with those. Exponents of zero and one should - have been handled by expression folding. */ - if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) - return false; - - return true; -} - -/* Recursively linearize a binary expression that is the RHS of STMT. - Place the operands of the expression tree in the vector named OPS. */ - -static void -linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt, - bool is_associative, bool set_visited) -{ - tree binlhs = gimple_assign_rhs1 (stmt); - tree binrhs = gimple_assign_rhs2 (stmt); - gimple binlhsdef = NULL, binrhsdef = NULL; - bool binlhsisreassoc = false; - bool binrhsisreassoc = false; - enum tree_code rhscode = gimple_assign_rhs_code (stmt); - struct loop *loop = loop_containing_stmt (stmt); - tree base = NULL_TREE; - HOST_WIDE_INT exponent = 0; - - if (set_visited) - gimple_set_visited (stmt, true); - - if (TREE_CODE (binlhs) == SSA_NAME) - { - binlhsdef = SSA_NAME_DEF_STMT (binlhs); - binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) - && !stmt_could_throw_p (binlhsdef)); - } - - if (TREE_CODE (binrhs) == SSA_NAME) - { - binrhsdef = SSA_NAME_DEF_STMT (binrhs); - binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) - && !stmt_could_throw_p (binrhsdef)); - } - - /* If the LHS is not reassociable, but the RHS is, we need to swap - them. If neither is reassociable, there is nothing we can do, so - just put them in the ops vector. If the LHS is reassociable, - linearize it. If both are reassociable, then linearize the RHS - and the LHS. */ - - if (!binlhsisreassoc) - { - tree temp; - - /* If this is not a associative operation like division, give up. */ - if (!is_associative) - { - add_to_ops_vec (ops, binrhs); - return; - } - - if (!binrhsisreassoc) - { - if (rhscode == MULT_EXPR - && TREE_CODE (binrhs) == SSA_NAME - && acceptable_pow_call (binrhsdef, &base, &exponent)) - { - add_repeat_to_ops_vec (ops, base, exponent); - gimple_set_visited (binrhsdef, true); - } - else - add_to_ops_vec (ops, binrhs); - - if (rhscode == MULT_EXPR - && TREE_CODE (binlhs) == SSA_NAME - && acceptable_pow_call (binlhsdef, &base, &exponent)) - { - add_repeat_to_ops_vec (ops, base, exponent); - gimple_set_visited (binlhsdef, true); - } - else - add_to_ops_vec (ops, binlhs); - - return; - } - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "swapping operands of "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - swap_tree_operands (stmt, - gimple_assign_rhs1_ptr (stmt), - gimple_assign_rhs2_ptr (stmt)); - update_stmt (stmt); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " is now "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - /* We want to make it so the lhs is always the reassociative op, - so swap. */ - temp = binlhs; - binlhs = binrhs; - binrhs = temp; - } - else if (binrhsisreassoc) - { - linearize_expr (stmt); - binlhs = gimple_assign_rhs1 (stmt); - binrhs = gimple_assign_rhs2 (stmt); - } - - gcc_assert (TREE_CODE (binrhs) != SSA_NAME - || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), - rhscode, loop)); - linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), - is_associative, set_visited); - - if (rhscode == MULT_EXPR - && TREE_CODE (binrhs) == SSA_NAME - && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent)) - { - add_repeat_to_ops_vec (ops, base, exponent); - gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true); - } - else - add_to_ops_vec (ops, binrhs); -} - -/* Repropagate the negates back into subtracts, since no other pass - currently does it. */ - -static void -repropagate_negates (void) -{ - unsigned int i = 0; - tree negate; - - FOR_EACH_VEC_ELT (plus_negates, i, negate) - { - gimple user = get_single_immediate_use (negate); - - if (!user || !is_gimple_assign (user)) - continue; - - /* The negate operand can be either operand of a PLUS_EXPR - (it can be the LHS if the RHS is a constant for example). - - Force the negate operand to the RHS of the PLUS_EXPR, then - transform the PLUS_EXPR into a MINUS_EXPR. */ - if (gimple_assign_rhs_code (user) == PLUS_EXPR) - { - /* If the negated operand appears on the LHS of the - PLUS_EXPR, exchange the operands of the PLUS_EXPR - to force the negated operand to the RHS of the PLUS_EXPR. */ - if (gimple_assign_rhs1 (user) == negate) - { - swap_tree_operands (user, - gimple_assign_rhs1_ptr (user), - gimple_assign_rhs2_ptr (user)); - } - - /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace - the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ - if (gimple_assign_rhs2 (user) == negate) - { - tree rhs1 = gimple_assign_rhs1 (user); - tree rhs2 = get_unary_op (negate, NEGATE_EXPR); - gimple_stmt_iterator gsi = gsi_for_stmt (user); - gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); - update_stmt (user); - } - } - else if (gimple_assign_rhs_code (user) == MINUS_EXPR) - { - if (gimple_assign_rhs1 (user) == negate) - { - /* We have - x = -a - y = x - b - which we transform into - x = a + b - y = -x . - This pushes down the negate which we possibly can merge - into some other operation, hence insert it into the - plus_negates vector. */ - gimple feed = SSA_NAME_DEF_STMT (negate); - tree a = gimple_assign_rhs1 (feed); - tree rhs2 = gimple_assign_rhs2 (user); - gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2; - gimple_replace_lhs (feed, negate); - gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2); - update_stmt (gsi_stmt (gsi)); - gsi2 = gsi_for_stmt (user); - gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL); - update_stmt (gsi_stmt (gsi2)); - gsi_move_before (&gsi, &gsi2); - plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2))); - } - else - { - /* Transform "x = -a; y = b - x" into "y = b + a", getting - rid of one operation. */ - gimple feed = SSA_NAME_DEF_STMT (negate); - tree a = gimple_assign_rhs1 (feed); - tree rhs1 = gimple_assign_rhs1 (user); - gimple_stmt_iterator gsi = gsi_for_stmt (user); - gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); - update_stmt (gsi_stmt (gsi)); - } - } - } -} - -/* Returns true if OP is of a type for which we can do reassociation. - That is for integral or non-saturating fixed-point types, and for - floating point type when associative-math is enabled. */ - -static bool -can_reassociate_p (tree op) -{ - tree type = TREE_TYPE (op); - if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) - || NON_SAT_FIXED_POINT_TYPE_P (type) - || (flag_associative_math && FLOAT_TYPE_P (type))) - return true; - return false; -} - -/* Break up subtract operations in block BB. - - We do this top down because we don't know whether the subtract is - part of a possible chain of reassociation except at the top. - - IE given - d = f + g - c = a + e - b = c - d - q = b - r - k = t - q - - we want to break up k = t - q, but we won't until we've transformed q - = b - r, which won't be broken up until we transform b = c - d. - - En passant, clear the GIMPLE visited flag on every statement. */ - -static void -break_up_subtract_bb (basic_block bb) -{ - gimple_stmt_iterator gsi; - basic_block son; - - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple stmt = gsi_stmt (gsi); - gimple_set_visited (stmt, false); - - if (!is_gimple_assign (stmt) - || !can_reassociate_p (gimple_assign_lhs (stmt))) - continue; - - /* Look for simple gimple subtract operations. */ - if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) - { - if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) - || !can_reassociate_p (gimple_assign_rhs2 (stmt))) - continue; - - /* Check for a subtract used only in an addition. If this - is the case, transform it into add of a negate for better - reassociation. IE transform C = A-B into C = A + -B if C - is only used in an addition. */ - if (should_break_up_subtract (stmt)) - break_up_subtract (stmt, &gsi); - } - else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR - && can_reassociate_p (gimple_assign_rhs1 (stmt))) - plus_negates.safe_push (gimple_assign_lhs (stmt)); - } - for (son = first_dom_son (CDI_DOMINATORS, bb); - son; - son = next_dom_son (CDI_DOMINATORS, son)) - break_up_subtract_bb (son); -} - -/* Used for repeated factor analysis. */ -struct repeat_factor_d -{ - /* An SSA name that occurs in a multiply chain. */ - tree factor; - - /* Cached rank of the factor. */ - unsigned rank; - - /* Number of occurrences of the factor in the chain. */ - HOST_WIDE_INT count; - - /* An SSA name representing the product of this factor and - all factors appearing later in the repeated factor vector. */ - tree repr; -}; - -typedef struct repeat_factor_d repeat_factor, *repeat_factor_t; -typedef const struct repeat_factor_d *const_repeat_factor_t; - - -static vec<repeat_factor> repeat_factor_vec; - -/* Used for sorting the repeat factor vector. Sort primarily by - ascending occurrence count, secondarily by descending rank. */ - -static int -compare_repeat_factors (const void *x1, const void *x2) -{ - const_repeat_factor_t rf1 = (const_repeat_factor_t) x1; - const_repeat_factor_t rf2 = (const_repeat_factor_t) x2; - - if (rf1->count != rf2->count) - return rf1->count - rf2->count; - - return rf2->rank - rf1->rank; -} - -/* Look for repeated operands in OPS in the multiply tree rooted at - STMT. Replace them with an optimal sequence of multiplies and powi - builtin calls, and remove the used operands from OPS. Return an - SSA name representing the value of the replacement sequence. */ - -static tree -attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops) -{ - unsigned i, j, vec_len; - int ii; - operand_entry_t oe; - repeat_factor_t rf1, rf2; - repeat_factor rfnew; - tree result = NULL_TREE; - tree target_ssa, iter_result; - tree type = TREE_TYPE (gimple_get_lhs (stmt)); - tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); - gimple_stmt_iterator gsi = gsi_for_stmt (stmt); - gimple mul_stmt, pow_stmt; - - /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and - target. */ - if (!powi_fndecl) - return NULL_TREE; - - /* Allocate the repeated factor vector. */ - repeat_factor_vec.create (10); - - /* Scan the OPS vector for all SSA names in the product and build - up a vector of occurrence counts for each factor. */ - FOR_EACH_VEC_ELT (*ops, i, oe) - { - if (TREE_CODE (oe->op) == SSA_NAME) - { - FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) - { - if (rf1->factor == oe->op) - { - rf1->count += oe->count; - break; - } - } - - if (j >= repeat_factor_vec.length ()) - { - rfnew.factor = oe->op; - rfnew.rank = oe->rank; - rfnew.count = oe->count; - rfnew.repr = NULL_TREE; - repeat_factor_vec.safe_push (rfnew); - } - } - } - - /* Sort the repeated factor vector by (a) increasing occurrence count, - and (b) decreasing rank. */ - repeat_factor_vec.qsort (compare_repeat_factors); - - /* It is generally best to combine as many base factors as possible - into a product before applying __builtin_powi to the result. - However, the sort order chosen for the repeated factor vector - allows us to cache partial results for the product of the base - factors for subsequent use. When we already have a cached partial - result from a previous iteration, it is best to make use of it - before looking for another __builtin_pow opportunity. - - As an example, consider x * x * y * y * y * z * z * z * z. - We want to first compose the product x * y * z, raise it to the - second power, then multiply this by y * z, and finally multiply - by z. This can be done in 5 multiplies provided we cache y * z - for use in both expressions: - - t1 = y * z - t2 = t1 * x - t3 = t2 * t2 - t4 = t1 * t3 - result = t4 * z - - If we instead ignored the cached y * z and first multiplied by - the __builtin_pow opportunity z * z, we would get the inferior: - - t1 = y * z - t2 = t1 * x - t3 = t2 * t2 - t4 = z * z - t5 = t3 * t4 - result = t5 * y */ - - vec_len = repeat_factor_vec.length (); - - /* Repeatedly look for opportunities to create a builtin_powi call. */ - while (true) - { - HOST_WIDE_INT power; - - /* First look for the largest cached product of factors from - preceding iterations. If found, create a builtin_powi for - it if the minimum occurrence count for its factors is at - least 2, or just use this cached product as our next - multiplicand if the minimum occurrence count is 1. */ - FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) - { - if (rf1->repr && rf1->count > 0) - break; - } - - if (j < vec_len) - { - power = rf1->count; - - if (power == 1) - { - iter_result = rf1->repr; - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - unsigned elt; - repeat_factor_t rf; - fputs ("Multiplying by cached product ", dump_file); - for (elt = j; elt < vec_len; elt++) - { - rf = &repeat_factor_vec[elt]; - print_generic_expr (dump_file, rf->factor, 0); - if (elt < vec_len - 1) - fputs (" * ", dump_file); - } - fputs ("\n", dump_file); - } - } - else - { - iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); - pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, - build_int_cst (integer_type_node, - power)); - gimple_call_set_lhs (pow_stmt, iter_result); - gimple_set_location (pow_stmt, gimple_location (stmt)); - gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - unsigned elt; - repeat_factor_t rf; - fputs ("Building __builtin_pow call for cached product (", - dump_file); - for (elt = j; elt < vec_len; elt++) - { - rf = &repeat_factor_vec[elt]; - print_generic_expr (dump_file, rf->factor, 0); - if (elt < vec_len - 1) - fputs (" * ", dump_file); - } - fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", - power); - } - } - } - else - { - /* Otherwise, find the first factor in the repeated factor - vector whose occurrence count is at least 2. If no such - factor exists, there are no builtin_powi opportunities - remaining. */ - FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) - { - if (rf1->count >= 2) - break; - } - - if (j >= vec_len) - break; - - power = rf1->count; - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - unsigned elt; - repeat_factor_t rf; - fputs ("Building __builtin_pow call for (", dump_file); - for (elt = j; elt < vec_len; elt++) - { - rf = &repeat_factor_vec[elt]; - print_generic_expr (dump_file, rf->factor, 0); - if (elt < vec_len - 1) - fputs (" * ", dump_file); - } - fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power); - } - - reassociate_stats.pows_created++; - - /* Visit each element of the vector in reverse order (so that - high-occurrence elements are visited first, and within the - same occurrence count, lower-ranked elements are visited - first). Form a linear product of all elements in this order - whose occurrencce count is at least that of element J. - Record the SSA name representing the product of each element - with all subsequent elements in the vector. */ - if (j == vec_len - 1) - rf1->repr = rf1->factor; - else - { - for (ii = vec_len - 2; ii >= (int)j; ii--) - { - tree op1, op2; - - rf1 = &repeat_factor_vec[ii]; - rf2 = &repeat_factor_vec[ii + 1]; - - /* Init the last factor's representative to be itself. */ - if (!rf2->repr) - rf2->repr = rf2->factor; - - op1 = rf1->factor; - op2 = rf2->repr; - - target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); - mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, - target_ssa, - op1, op2); - gimple_set_location (mul_stmt, gimple_location (stmt)); - gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); - rf1->repr = target_ssa; - - /* Don't reprocess the multiply we just introduced. */ - gimple_set_visited (mul_stmt, true); - } - } - - /* Form a call to __builtin_powi for the maximum product - just formed, raised to the power obtained earlier. */ - rf1 = &repeat_factor_vec[j]; - iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); - pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, - build_int_cst (integer_type_node, - power)); - gimple_call_set_lhs (pow_stmt, iter_result); - gimple_set_location (pow_stmt, gimple_location (stmt)); - gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); - } - - /* If we previously formed at least one other builtin_powi call, - form the product of this one and those others. */ - if (result) - { - tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); - mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result, - result, iter_result); - gimple_set_location (mul_stmt, gimple_location (stmt)); - gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); - gimple_set_visited (mul_stmt, true); - result = new_result; - } - else - result = iter_result; - - /* Decrement the occurrence count of each element in the product - by the count found above, and remove this many copies of each - factor from OPS. */ - for (i = j; i < vec_len; i++) - { - unsigned k = power; - unsigned n; - - rf1 = &repeat_factor_vec[i]; - rf1->count -= power; - - FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) - { - if (oe->op == rf1->factor) - { - if (oe->count <= k) - { - ops->ordered_remove (n); - k -= oe->count; - - if (k == 0) - break; - } - else - { - oe->count -= k; - break; - } - } - } - } - } - - /* At this point all elements in the repeated factor vector have a - remaining occurrence count of 0 or 1, and those with a count of 1 - don't have cached representatives. Re-sort the ops vector and - clean up. */ - ops->qsort (sort_by_operand_rank); - repeat_factor_vec.release (); - - /* Return the final product computed herein. Note that there may - still be some elements with single occurrence count left in OPS; - those will be handled by the normal reassociation logic. */ - return result; -} - -/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ - -static void -transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) -{ - tree rhs1; - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Transforming "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - rhs1 = gimple_assign_rhs1 (stmt); - gimple_assign_set_rhs_from_tree (gsi, new_rhs); - update_stmt (stmt); - remove_visited_stmt_chain (rhs1); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " into "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } -} - -/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ - -static void -transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, - tree rhs1, tree rhs2) -{ - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Transforming "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); - update_stmt (gsi_stmt (*gsi)); - remove_visited_stmt_chain (rhs1); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " into "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } -} - -/* Reassociate expressions in basic block BB and its post-dominator as - children. */ - -static void -reassociate_bb (basic_block bb) -{ - gimple_stmt_iterator gsi; - basic_block son; - gimple stmt = last_stmt (bb); - - if (stmt && !gimple_visited_p (stmt)) - maybe_optimize_range_tests (stmt); - - for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) - { - stmt = gsi_stmt (gsi); - - if (is_gimple_assign (stmt) - && !stmt_could_throw_p (stmt)) - { - tree lhs, rhs1, rhs2; - enum tree_code rhs_code = gimple_assign_rhs_code (stmt); - - /* If this is not a gimple binary expression, there is - nothing for us to do with it. */ - if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) - continue; - - /* If this was part of an already processed statement, - we don't need to touch it again. */ - if (gimple_visited_p (stmt)) - { - /* This statement might have become dead because of previous - reassociations. */ - if (has_zero_uses (gimple_get_lhs (stmt))) - { - gsi_remove (&gsi, true); - release_defs (stmt); - /* We might end up removing the last stmt above which - places the iterator to the end of the sequence. - Reset it to the last stmt in this case which might - be the end of the sequence as well if we removed - the last statement of the sequence. In which case - we need to bail out. */ - if (gsi_end_p (gsi)) - { - gsi = gsi_last_bb (bb); - if (gsi_end_p (gsi)) - break; - } - } - continue; - } - - lhs = gimple_assign_lhs (stmt); - rhs1 = gimple_assign_rhs1 (stmt); - rhs2 = gimple_assign_rhs2 (stmt); - - /* For non-bit or min/max operations we can't associate - all types. Verify that here. */ - if (rhs_code != BIT_IOR_EXPR - && rhs_code != BIT_AND_EXPR - && rhs_code != BIT_XOR_EXPR - && rhs_code != MIN_EXPR - && rhs_code != MAX_EXPR - && (!can_reassociate_p (lhs) - || !can_reassociate_p (rhs1) - || !can_reassociate_p (rhs2))) - continue; - - if (associative_tree_code (rhs_code)) - { - vec<operand_entry_t> ops = vNULL; - tree powi_result = NULL_TREE; - - /* There may be no immediate uses left by the time we - get here because we may have eliminated them all. */ - if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) - continue; - - gimple_set_visited (stmt, true); - linearize_expr_tree (&ops, stmt, true, true); - ops.qsort (sort_by_operand_rank); - optimize_ops_list (rhs_code, &ops); - if (undistribute_ops_list (rhs_code, &ops, - loop_containing_stmt (stmt))) - { - ops.qsort (sort_by_operand_rank); - optimize_ops_list (rhs_code, &ops); - } - - if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) - optimize_range_tests (rhs_code, &ops); - - if (first_pass_instance - && rhs_code == MULT_EXPR - && flag_unsafe_math_optimizations) - powi_result = attempt_builtin_powi (stmt, &ops); - - /* If the operand vector is now empty, all operands were - consumed by the __builtin_powi optimization. */ - if (ops.length () == 0) - transform_stmt_to_copy (&gsi, stmt, powi_result); - else if (ops.length () == 1) - { - tree last_op = ops.last ()->op; - - if (powi_result) - transform_stmt_to_multiply (&gsi, stmt, last_op, - powi_result); - else - transform_stmt_to_copy (&gsi, stmt, last_op); - } - else - { - enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); - int ops_num = ops.length (); - int width = get_reassociation_width (ops_num, rhs_code, mode); - - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, - "Width = %d was chosen for reassociation\n", width); - - if (width > 1 - && ops.length () > 3) - rewrite_expr_tree_parallel (stmt, width, ops); - else - rewrite_expr_tree (stmt, 0, ops, false); - - /* If we combined some repeated factors into a - __builtin_powi call, multiply that result by the - reassociated operands. */ - if (powi_result) - { - gimple mul_stmt; - tree type = TREE_TYPE (gimple_get_lhs (stmt)); - tree target_ssa = make_temp_ssa_name (type, NULL, - "reassocpow"); - gimple_set_lhs (stmt, target_ssa); - update_stmt (stmt); - mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs, - powi_result, - target_ssa); - gimple_set_location (mul_stmt, gimple_location (stmt)); - gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); - } - } - - ops.release (); - } - } - } - for (son = first_dom_son (CDI_POST_DOMINATORS, bb); - son; - son = next_dom_son (CDI_POST_DOMINATORS, son)) - reassociate_bb (son); -} - -void dump_ops_vector (FILE *file, vec<operand_entry_t> ops); -void debug_ops_vector (vec<operand_entry_t> ops); - -/* Dump the operand entry vector OPS to FILE. */ - -void -dump_ops_vector (FILE *file, vec<operand_entry_t> ops) -{ - operand_entry_t oe; - unsigned int i; - - FOR_EACH_VEC_ELT (ops, i, oe) - { - fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); - print_generic_expr (file, oe->op, 0); - } -} - -/* Dump the operand entry vector OPS to STDERR. */ - -DEBUG_FUNCTION void -debug_ops_vector (vec<operand_entry_t> ops) -{ - dump_ops_vector (stderr, ops); -} - -static void -do_reassoc (void) -{ - break_up_subtract_bb (ENTRY_BLOCK_PTR); - reassociate_bb (EXIT_BLOCK_PTR); -} - -/* Initialize the reassociation pass. */ - -static void -init_reassoc (void) -{ - int i; - long rank = 2; - int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS); - - /* Find the loops, so that we can prevent moving calculations in - them. */ - loop_optimizer_init (AVOID_CFG_MODIFICATIONS); - - memset (&reassociate_stats, 0, sizeof (reassociate_stats)); - - operand_entry_pool = create_alloc_pool ("operand entry pool", - sizeof (struct operand_entry), 30); - next_operand_entry_id = 0; - - /* Reverse RPO (Reverse Post Order) will give us something where - deeper loops come later. */ - pre_and_rev_post_order_compute (NULL, bbs, false); - bb_rank = XCNEWVEC (long, last_basic_block); - operand_rank = pointer_map_create (); - - /* Give each default definition a distinct rank. This includes - parameters and the static chain. Walk backwards over all - SSA names so that we get proper rank ordering according - to tree_swap_operands_p. */ - for (i = num_ssa_names - 1; i > 0; --i) - { - tree name = ssa_name (i); - if (name && SSA_NAME_IS_DEFAULT_DEF (name)) - insert_operand_rank (name, ++rank); - } - - /* Set up rank for each BB */ - for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++) - bb_rank[bbs[i]] = ++rank << 16; - - free (bbs); - calculate_dominance_info (CDI_POST_DOMINATORS); - plus_negates = vNULL; -} - -/* Cleanup after the reassociation pass, and print stats if - requested. */ - -static void -fini_reassoc (void) -{ - statistics_counter_event (cfun, "Linearized", - reassociate_stats.linearized); - statistics_counter_event (cfun, "Constants eliminated", - reassociate_stats.constants_eliminated); - statistics_counter_event (cfun, "Ops eliminated", - reassociate_stats.ops_eliminated); - statistics_counter_event (cfun, "Statements rewritten", - reassociate_stats.rewritten); - statistics_counter_event (cfun, "Built-in pow[i] calls encountered", - reassociate_stats.pows_encountered); - statistics_counter_event (cfun, "Built-in powi calls created", - reassociate_stats.pows_created); - - pointer_map_destroy (operand_rank); - free_alloc_pool (operand_entry_pool); - free (bb_rank); - plus_negates.release (); - free_dominance_info (CDI_POST_DOMINATORS); - loop_optimizer_finalize (); -} - -/* Gate and execute functions for Reassociation. */ - -static unsigned int -execute_reassoc (void) -{ - init_reassoc (); - - do_reassoc (); - repropagate_negates (); - - fini_reassoc (); - return 0; -} - -static bool -gate_tree_ssa_reassoc (void) -{ - return flag_tree_reassoc != 0; -} - -struct gimple_opt_pass pass_reassoc = -{ - { - GIMPLE_PASS, - "reassoc", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - gate_tree_ssa_reassoc, /* gate */ - execute_reassoc, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_TREE_REASSOC, /* tv_id */ - PROP_cfg | PROP_ssa, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_ssa - | TODO_update_ssa_only_virtuals - | TODO_verify_flow - | TODO_ggc_collect /* todo_flags_finish */ - } -}; |