From 4eb84d2bc49c3e210e30683186b4f9272994f026 Mon Sep 17 00:00:00 2001
From: Aditya Kumar <aditya.k7@samsung.com>
Date: Tue, 18 Aug 2015 13:07:08 -0500
Subject: Cherry-pick: PR tree-optimization/65447

commit ad478851f7dd45438a9b33ecd7f30a4e6ab00388
Author: amker <amker@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Wed May 20 05:15:56 2015 +0000

        PR tree-optimization/65447
        * tree-ssa-loop-ivopts.c (struct iv_use): New fields.
        (dump_use, dump_uses): Support to dump sub use.
        (record_use): New parameters to support sub use.  Remove call to
        dump_use.
        (record_sub_use, record_group_use): New functions.
        (compute_max_addr_offset, split_all_small_groups): New functions.
        (group_address_uses, rewrite_use_address): New functions.
        (strip_offset): New declaration.
        (find_interesting_uses_address): Call record_group_use.
        (add_candidate): New assertion.
        (infinite_cost_p): Move definition forward.
        (add_costs): Check INFTY cost and return immediately.
        (get_computation_cost_at): Clear setup cost and dependent bitmap
        for sub uses.
        (determine_use_iv_cost_address): Compute cost for sub uses.
        (rewrite_use_address_1): Rename from old rewrite_use_address.
        (free_loop_data): Free sub uses.
        (tree_ssa_iv_optimize_loop): Call group_address_uses.

        gcc/testsuite
        PR tree-optimization/65447
        * gcc.dg/tree-ssa/pr65447.c: New test.

    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223433 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c |  54 ++++
 gcc-4.9/gcc/tree-ssa-loop-ivopts.c              | 378 ++++++++++++++++++++++--
 2 files changed, 412 insertions(+), 20 deletions(-)
 create mode 100644 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c

(limited to 'gcc-4.9')

diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
new file mode 100644
index 000000000..c5bddbfba
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+void foo (double *p)
+{
+  int i;
+  for (i = -20000; i < 200000; i+= 40)
+    {
+      p[i+0] = 1.0;
+      p[i+1] = 1.0;
+      p[i+2] = 1.0;
+      p[i+3] = 1.0;
+      p[i+4] = 1.0;
+      p[i+5] = 1.0;
+      p[i+6] = 1.0;
+      p[i+7] = 1.0;
+      p[i+8] = 1.0;
+      p[i+9] = 1.0;
+      p[i+10] = 1.0;
+      p[i+11] = 1.0;
+      p[i+12] = 1.0;
+      p[i+13] = 1.0;
+      p[i+14] = 1.0;
+      p[i+15] = 1.0;
+      p[i+16] = 1.0;
+      p[i+17] = 1.0;
+      p[i+18] = 1.0;
+      p[i+19] = 1.0;
+      p[i+20] = 1.0;
+      p[i+21] = 1.0;
+      p[i+22] = 1.0;
+      p[i+23] = 1.0;
+      p[i+24] = 1.0;
+      p[i+25] = 1.0;
+      p[i+26] = 1.0;
+      p[i+27] = 1.0;
+      p[i+28] = 1.0;
+      p[i+29] = 1.0;
+      p[i+30] = 1.0;
+      p[i+31] = 1.0;
+      p[i+32] = 1.0;
+      p[i+33] = 1.0;
+      p[i+34] = 1.0;
+      p[i+35] = 1.0;
+      p[i+36] = 1.0;
+      p[i+37] = 1.0;
+      p[i+38] = 1.0;
+      p[i+39] = 1.0;
+    }
+}
+
+/* We should groups address type IV uses.  */
+/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } }  */
+/* { dg-final { cleanup-tree-dump "ivopts" } }  */
diff --git a/gcc-4.9/gcc/tree-ssa-loop-ivopts.c b/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
index c5a5dd48a..400187964 100644
--- a/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
@@ -197,6 +197,7 @@ struct cost_pair
 struct iv_use
 {
   unsigned id;		/* The id of the use.  */
+  unsigned sub_id;	/* The id of the sub use.  */
   enum use_type type;	/* Type of the use.  */
   struct iv *iv;	/* The induction variable it is based on.  */
   gimple stmt;		/* Statement in that it occurs.  */
@@ -210,6 +211,11 @@ struct iv_use
 
   struct iv_cand *selected;
 			/* The selected candidate.  */
+
+  struct iv_use *next;	/* The next sub use.  */
+  tree addr_base;	/* Base address with const offset stripped.  */
+  unsigned HOST_WIDE_INT addr_offset;
+			/* Const offset stripped from base address.  */
 };
 
 /* The position where the iv is computed.  */
@@ -522,7 +528,11 @@ dump_iv (FILE *file, struct iv *iv)
 void
 dump_use (FILE *file, struct iv_use *use)
 {
-  fprintf (file, "use %d\n", use->id);
+  fprintf (file, "use %d", use->id);
+  if (use->sub_id)
+    fprintf (file, ".%d", use->sub_id);
+
+  fprintf (file, "\n");
 
   switch (use->type)
     {
@@ -571,8 +581,12 @@ dump_uses (FILE *file, struct ivopts_data *data)
   for (i = 0; i < n_iv_uses (data); i++)
     {
       use = iv_use (data, i);
-
-      dump_use (file, use);
+      do
+	{
+	  dump_use (file, use);
+	  use = use->next;
+	}
+      while (use);
       fprintf (file, "\n");
     }
 }
@@ -1229,33 +1243,88 @@ find_induction_variables (struct ivopts_data *data)
   return true;
 }
 
-/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
+/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
+   is the const offset stripped from IV base.  For uses of other types,
+   ADDR_BASE and ADDR_OFFSET are zero by default.  */
 
 static struct iv_use *
 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
-	    gimple stmt, enum use_type use_type)
+	    gimple stmt, enum use_type use_type, tree addr_base = NULL,
+	    unsigned HOST_WIDE_INT addr_offset = 0)
 {
   struct iv_use *use = XCNEW (struct iv_use);
 
   use->id = n_iv_uses (data);
+  use->sub_id = 0;
   use->type = use_type;
   use->iv = iv;
   use->stmt = stmt;
   use->op_p = use_p;
   use->related_cands = BITMAP_ALLOC (NULL);
+  use->next = NULL;
+  use->addr_base = addr_base;
+  use->addr_offset = addr_offset;
 
   /* To avoid showing ssa name in the dumps, if it was not reset by the
      caller.  */
   iv->ssa_name = NULL_TREE;
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_use (dump_file, use);
-
   data->iv_uses.safe_push (use);
 
   return use;
 }
 
+/* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   The sub use is recorded under the one whose use id is ID_GROUP.  */
+
+static struct iv_use *
+record_sub_use (struct ivopts_data *data, tree *use_p,
+		    struct iv *iv, gimple stmt, enum use_type use_type,
+		    tree addr_base, unsigned HOST_WIDE_INT addr_offset,
+		    unsigned int id_group)
+{
+  struct iv_use *use = XCNEW (struct iv_use);
+  struct iv_use *group = iv_use (data, id_group);
+
+  use->id = group->id;
+  use->sub_id = 0;
+  use->type = use_type;
+  use->iv = iv;
+  use->stmt = stmt;
+  use->op_p = use_p;
+  use->related_cands = NULL;
+  use->addr_base = addr_base;
+  use->addr_offset = addr_offset;
+
+  /* Sub use list is maintained in offset ascending order.  */
+  if (addr_offset <= group->addr_offset)
+    {
+      use->related_cands = group->related_cands;
+      group->related_cands = NULL;
+      use->next = group;
+      data->iv_uses[id_group] = use;
+    }
+  else
+    {
+      struct iv_use *pre;
+      do
+	{
+	  pre = group;
+	  group = group->next;
+	}
+      while (group && addr_offset > group->addr_offset);
+      use->next = pre->next;
+      pre->next = use;
+    }
+
+  /* To avoid showing ssa name in the dumps, if it was not reset by the
+     caller.  */
+  iv->ssa_name = NULL_TREE;
+
+  return use;
+}
+
 /* Checks whether OP is a loop-level invariant and if so, records it.
    NONLINEAR_USE is true if the invariant is used in a way we do not
    handle specially.  */
@@ -1739,6 +1808,50 @@ may_be_nonaddressable_p (tree expr)
   return false;
 }
 
+static tree
+strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
+
+/* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   If there is an existing use which has same stripped iv base and step,
+   this function records this one as a sub use to that; otherwise records
+   it as a normal one.  */
+
+static struct iv_use *
+record_group_use (struct ivopts_data *data, tree *use_p,
+		  struct iv *iv, gimple stmt, enum use_type use_type)
+{
+  unsigned int i;
+  struct iv_use *use;
+  tree addr_base;
+  unsigned HOST_WIDE_INT addr_offset;
+
+  /* Only support sub use for address type uses, that is, with base
+     object.  */
+  if (!iv->base_object)
+    return record_use (data, use_p, iv, stmt, use_type);
+
+  addr_base = strip_offset (iv->base, &addr_offset);
+  for (i = 0; i < n_iv_uses (data); i++)
+    {
+      use = iv_use (data, i);
+      if (use->type != USE_ADDRESS || !use->iv->base_object)
+	continue;
+
+      /* Check if it has the same stripped base and step.  */
+      if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
+	  && operand_equal_p (iv->step, use->iv->step, 0)
+	  && operand_equal_p (addr_base, use->addr_base, 0))
+	break;
+    }
+
+  if (i == n_iv_uses (data))
+    return record_use (data, use_p, iv, stmt,
+		       use_type, addr_base, addr_offset);
+  else
+    return record_sub_use (data, use_p, iv, stmt,
+			   use_type, addr_base, addr_offset, i);
+}
+
 /* Finds addresses in *OP_P inside STMT.  */
 
 static void
@@ -1849,7 +1962,7 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p
     }
 
   civ = alloc_iv (base, step);
-  record_use (data, op_p, civ, stmt, USE_ADDRESS);
+  record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
   return;
 
 fail:
@@ -2035,6 +2148,172 @@ find_interesting_uses (struct ivopts_data *data)
   free (body);
 }
 
+/* Compute maximum offset of [base + offset] addressing mode
+   for memory reference represented by USE.  */
+
+static HOST_WIDE_INT
+compute_max_addr_offset (struct iv_use *use)
+{
+  int width;
+  rtx reg, addr;
+  HOST_WIDE_INT i, off;
+  unsigned list_index, num;
+  addr_space_t as;
+  machine_mode mem_mode, addr_mode;
+  static vec<HOST_WIDE_INT> max_offset_list;
+
+  as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
+  mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
+
+  num = max_offset_list.length ();
+  list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
+  if (list_index >= num)
+    {
+      max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
+      for (; num < max_offset_list.length (); num++)
+	max_offset_list[num] = -1;
+    }
+
+  off = max_offset_list[list_index];
+  if (off != -1)
+    return off;
+
+  addr_mode = targetm.addr_space.address_mode (as);
+  reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
+  addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
+
+  width = GET_MODE_BITSIZE (addr_mode) - 1;
+  if (width > (HOST_BITS_PER_WIDE_INT - 1))
+    width = HOST_BITS_PER_WIDE_INT - 1;
+
+  for (i = width; i > 0; i--)
+    {
+      off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
+      XEXP (addr, 1) = gen_int_mode (off, addr_mode);
+      if (memory_address_addr_space_p (mem_mode, addr, as))
+	break;
+
+      /* For some strict-alignment targets, the offset must be naturally
+	 aligned.  Try an aligned offset if mem_mode is not QImode.  */
+      off = ((unsigned HOST_WIDE_INT) 1 << i);
+      if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
+	{
+	  off -= GET_MODE_SIZE (mem_mode);
+	  XEXP (addr, 1) = gen_int_mode (off, addr_mode);
+	  if (memory_address_addr_space_p (mem_mode, addr, as))
+	    break;
+	}
+    }
+  if (i == 0)
+    off = 0;
+
+  max_offset_list[list_index] = off;
+  return off;
+}
+
+/* Check if all small groups should be split.  Return true if and
+   only if:
+
+     1) At least one groups contain two uses with different offsets.
+     2) No group contains more than two uses with different offsets.
+
+   Return false otherwise.  We want to split such groups because:
+
+     1) Small groups don't have much benefit and may interfer with
+	general candidate selection.
+     2) Size for problem with only small groups is usually small and
+	general algorithm can handle it well.
+
+   TODO -- Above claim may not hold when auto increment is supported.  */
+
+static bool
+split_all_small_groups (struct ivopts_data *data)
+{
+  bool split_p = false;
+  unsigned int i, n, distinct;
+  struct iv_use *pre, *use;
+
+  n = n_iv_uses (data);
+  for (i = 0; i < n; i++)
+    {
+      use = iv_use (data, i);
+      if (!use->next)
+	continue;
+
+      distinct = 1;
+      gcc_assert (use->type == USE_ADDRESS);
+      for (pre = use, use = use->next; use; pre = use, use = use->next)
+	{
+	  if (pre->addr_offset != use->addr_offset)
+	    distinct++;
+
+	  if (distinct > 2)
+	    return false;
+	}
+      if (distinct == 2)
+	split_p = true;
+    }
+
+  return split_p;
+}
+
+/* For each group of address type uses, this function further groups
+   these uses according to the maximum offset supported by target's
+   [base + offset] addressing mode.  */
+
+static void
+group_address_uses (struct ivopts_data *data)
+{
+  HOST_WIDE_INT max_offset = -1;
+  unsigned int i, n, sub_id;
+  struct iv_use *pre, *use;
+  unsigned HOST_WIDE_INT addr_offset_first;
+
+  /* Reset max offset to split all small groups.  */
+  if (split_all_small_groups (data))
+    max_offset = 0;
+
+  n = n_iv_uses (data);
+  for (i = 0; i < n; i++)
+    {
+      use = iv_use (data, i);
+      if (!use->next)
+	continue;
+
+      gcc_assert (use->type == USE_ADDRESS);
+      if (max_offset != 0)
+	max_offset = compute_max_addr_offset (use);
+
+      while (use)
+	{
+	  sub_id = 0;
+	  addr_offset_first = use->addr_offset;
+	  /* Only uses with offset that can fit in offset part against
+	     the first use can be grouped together.  */
+	  for (pre = use, use = use->next;
+	       use && (use->addr_offset - addr_offset_first
+		       <= (unsigned HOST_WIDE_INT) max_offset);
+	       pre = use, use = use->next)
+	    {
+	      use->id = pre->id;
+	      use->sub_id = ++sub_id;
+	    }
+
+	  /* Break the list and create new group.  */
+	  if (use)
+	    {
+	      pre->next = NULL;
+	      use->id = n_iv_uses (data);
+	      use->related_cands = BITMAP_ALLOC (NULL);
+	      data->iv_uses.safe_push (use);
+	    }
+	}
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    dump_uses (dump_file, data);
+}
+
 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
    we are at the top-level of the processed address.  */
@@ -2458,6 +2737,8 @@ static void
 add_candidate (struct ivopts_data *data,
 	       tree base, tree step, bool important, struct iv_use *use)
 {
+  gcc_assert (use == NULL || use->sub_id == 0);
+
   if (ip_normal_pos (data->current_loop))
     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
   if (ip_end_pos (data->current_loop)
@@ -2687,11 +2968,22 @@ new_cost (unsigned runtime, unsigned complexity)
   return cost;
 }
 
+/* Returns true if COST is infinite.  */
+
+static bool
+infinite_cost_p (comp_cost cost)
+{
+  return cost.cost == INFTY;
+}
+
 /* Adds costs COST1 and COST2.  */
 
 static comp_cost
 add_costs (comp_cost cost1, comp_cost cost2)
 {
+  if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
+    return infinite_cost;
+
   cost1.cost += cost2.cost;
   cost1.complexity += cost2.complexity;
 
@@ -2720,14 +3012,6 @@ compare_costs (comp_cost cost1, comp_cost cost2)
   return cost1.cost - cost2.cost;
 }
 
-/* Returns true if COST is infinite.  */
-
-static bool
-infinite_cost_p (comp_cost cost)
-{
-  return cost.cost == INFTY;
-}
-
 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
    on invariants DEPENDS_ON and that the value used in expressing it
    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
@@ -4204,7 +4488,15 @@ get_computation_cost_at (struct ivopts_data *data,
       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
     }
 
-  if (inv_expr_id)
+  /* Set of invariants depended on by sub use has already been computed
+     for the first use in the group.  */
+  if (use->sub_id)
+    {
+      cost.cost = 0;
+      if (depends_on && *depends_on)
+	bitmap_clear (*depends_on);
+    }
+  else if (inv_expr_id)
     {
       *inv_expr_id =
           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
@@ -4333,6 +4625,8 @@ determine_use_iv_cost_address (struct ivopts_data *data,
   bitmap depends_on;
   bool can_autoinc;
   int inv_expr_id = -1;
+  struct iv_use *sub_use;
+  comp_cost sub_cost;
   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
 					 &can_autoinc, &inv_expr_id);
 
@@ -4346,6 +4640,15 @@ determine_use_iv_cost_address (struct ivopts_data *data,
       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
 	cost = infinite_cost;
     }
+  for (sub_use = use->next;
+       sub_use && !infinite_cost_p (cost);
+       sub_use = sub_use->next)
+    {
+       sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
+					&can_autoinc, &inv_expr_id);
+       cost = add_costs (cost, sub_cost);
+    }
+
   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
                    inv_expr_id);
 
@@ -6533,8 +6836,8 @@ adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
 /* Rewrites USE (address that is an iv) using candidate CAND.  */
 
 static void
-rewrite_use_address (struct ivopts_data *data,
-		     struct iv_use *use, struct iv_cand *cand)
+rewrite_use_address_1 (struct ivopts_data *data,
+		       struct iv_use *use, struct iv_cand *cand)
 {
   aff_tree aff;
   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
@@ -6569,6 +6872,28 @@ rewrite_use_address (struct ivopts_data *data,
   *use->op_p = ref;
 }
 
+/* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
+   first use of a group, rewrites sub uses in the group too.  */
+
+static void
+rewrite_use_address (struct ivopts_data *data,
+		      struct iv_use *use, struct iv_cand *cand)
+{
+  struct iv_use *next;
+
+  gcc_assert (use->sub_id == 0);
+  rewrite_use_address_1 (data, use, cand);
+  update_stmt (use->stmt);
+
+  for (next = use->next; next != NULL; next = next->next)
+    {
+      rewrite_use_address_1 (data, next, cand);
+      update_stmt (next->stmt);
+    }
+
+  return;
+}
+
 /* Rewrites USE (the condition such that one of the arguments is an iv) using
    candidate CAND.  */
 
@@ -6845,6 +7170,18 @@ free_loop_data (struct ivopts_data *data)
   for (i = 0; i < n_iv_uses (data); i++)
     {
       struct iv_use *use = iv_use (data, i);
+      struct iv_use *pre = use, *sub = use->next;
+
+      while (sub)
+	{
+	  gcc_assert (sub->related_cands == NULL);
+	  gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
+
+	  free (sub->iv);
+	  pre = sub;
+	  sub = sub->next;
+	  free (pre);
+	}
 
       free (use->iv);
       BITMAP_FREE (use->related_cands);
@@ -6964,6 +7301,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
 
   /* Finds interesting uses (item 1).  */
   find_interesting_uses (data);
+  group_address_uses (data);
   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
     goto finish;
 
-- 
cgit v1.2.3


From 56c1a4fd4bb668f7981497688c4feae55d524aac Mon Sep 17 00:00:00 2001
From: Aditya Kumar <aditya.k7@samsung.com>
Date: Tue, 18 Aug 2015 13:27:54 -0500
Subject: PR tree-optimization/52563, tree-optimization/62173

commit 5fe66b3cf99994fd9c8c68cea43aa1cf42eaa76d
Author: amker <amker@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue Jun 2 03:33:35 2015 +0000

        PR tree-optimization/52563
        PR tree-optimization/62173
        * tree-ssa-loop-ivopts.c (struct iv): New field.  Reorder fields.
        (alloc_iv, set_iv): New parameter.
        (determine_biv_step): Delete.
        (find_bivs): Inline original determine_biv_step.  Pass new
        argument to set_iv.
        (idx_find_step): Use no_overflow information for conversion.
        * tree-scalar-evolution.c (analyze_scalar_evolution_in_loop): Let
        resolve_mixers handle folded_casts.
        (instantiate_scev_name): Change bool parameter to bool pointer.
        (instantiate_scev_poly, instantiate_scev_binary): Ditto.
        (instantiate_array_ref, instantiate_scev_not): Ditto.
        (instantiate_scev_3, instantiate_scev_2): Ditto.
        (instantiate_scev_1, instantiate_scev_r): Ditto.
        (instantiate_scev_convert, ): Change parameter.  Pass argument
        to chrec_convert_aggressive.
        (instantiate_scev): Change argument.
        (resolve_mixers): New parameter and set it.
        (scev_const_prop): New argument.
        * tree-scalar-evolution.h (resolve_mixers): New parameter.
        * tree-chrec.c (convert_affine_scev): Call chrec_convert instead
        of chrec_conert_1.
        (chrec_convert): New parameter.  Move definition below.
        (chrec_convert_aggressive): New parameter and set it.  Call
        convert_affine_scev.
        * tree-chrec.h (chrec_convert): New parameter.
        (chrec_convert_aggressive): Ditto.

        gcc/testsuite/ChangeLog
        PR tree-optimization/52563
        PR tree-optimization/62173
        * gcc.dg/tree-ssa/scev-3.c: Remove xfail.
        * gcc.dg/tree-ssa/scev-4.c: Ditto.

    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@224009 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c |   2 +-
 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c |   2 +-
 gcc-4.9/gcc/tree-chrec.c                       | 104 +++++++++++--------
 gcc-4.9/gcc/tree-chrec.h                       |   4 +-
 gcc-4.9/gcc/tree-scalar-evolution.c            | 138 ++++++++++++++-----------
 gcc-4.9/gcc/tree-scalar-evolution.h            |   2 +-
 gcc-4.9/gcc/tree-ssa-loop-ivopts.c             |  58 +++++------
 7 files changed, 171 insertions(+), 139 deletions(-)

(limited to 'gcc-4.9')

diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
index 5cac1cefb..28d5c9327 100644
--- a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
@@ -14,5 +14,5 @@ f(int k)
         }
 }
 
-/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" { xfail { lp64 || llp64 } } } } */
+/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
index 5f15d622d..6c1e530a9 100644
--- a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
@@ -19,5 +19,5 @@ f(int k)
         }
 }
 
-/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" { xfail { lp64 || llp64 } } } } */
+/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc-4.9/gcc/tree-chrec.c b/gcc-4.9/gcc/tree-chrec.c
index b9350f015..3e34c65dc 100644
--- a/gcc-4.9/gcc/tree-chrec.c
+++ b/gcc-4.9/gcc/tree-chrec.c
@@ -1162,8 +1162,6 @@ nb_vars_in_chrec (tree chrec)
     }
 }
 
-static tree chrec_convert_1 (tree, tree, gimple, bool);
-
 /* Converts BASE and STEP of affine scev to TYPE.  LOOP is the loop whose iv
    the scev corresponds to.  AT_STMT is the statement at that the scev is
    evaluated.  USE_OVERFLOW_SEMANTICS is true if this function should assume that
@@ -1238,8 +1236,7 @@ convert_affine_scev (struct loop *loop, tree type,
 				use_overflow_semantics))
     return false;
 
-  new_base = chrec_convert_1 (type, *base, at_stmt,
-			      use_overflow_semantics);
+  new_base = chrec_convert (type, *base, at_stmt, use_overflow_semantics);
   /* The step must be sign extended, regardless of the signedness
      of CT and TYPE.  This only needs to be handled specially when
      CT is unsigned -- to avoid e.g. unsigned char [100, +, 255]
@@ -1250,10 +1247,11 @@ convert_affine_scev (struct loop *loop, tree type,
   if (TYPE_PRECISION (step_type) > TYPE_PRECISION (ct) && TYPE_UNSIGNED (ct))
     {
       tree signed_ct = build_nonstandard_integer_type (TYPE_PRECISION (ct), 0);
-      new_step = chrec_convert_1 (signed_ct, new_step, at_stmt,
-                                  use_overflow_semantics);
+      new_step = chrec_convert (signed_ct, new_step, at_stmt,
+                                use_overflow_semantics);
     }
-  new_step = chrec_convert_1 (step_type, new_step, at_stmt, use_overflow_semantics);
+  new_step = chrec_convert (step_type, new_step, at_stmt,
+			    use_overflow_semantics);
 
   if (automatically_generated_chrec_p (new_base)
       || automatically_generated_chrec_p (new_step))
@@ -1283,36 +1281,6 @@ chrec_convert_rhs (tree type, tree chrec, gimple at_stmt)
   return chrec_convert (type, chrec, at_stmt);
 }
 
-/* Convert CHREC to TYPE.  When the analyzer knows the context in
-   which the CHREC is built, it sets AT_STMT to the statement that
-   contains the definition of the analyzed variable, otherwise the
-   conversion is less accurate: the information is used for
-   determining a more accurate estimation of the number of iterations.
-   By default AT_STMT could be safely set to NULL_TREE.
-
-   The following rule is always true: TREE_TYPE (chrec) ==
-   TREE_TYPE (CHREC_LEFT (chrec)) == TREE_TYPE (CHREC_RIGHT (chrec)).
-   An example of what could happen when adding two chrecs and the type
-   of the CHREC_RIGHT is different than CHREC_LEFT is:
-
-   {(uint) 0, +, (uchar) 10} +
-   {(uint) 0, +, (uchar) 250}
-
-   that would produce a wrong result if CHREC_RIGHT is not (uint):
-
-   {(uint) 0, +, (uchar) 4}
-
-   instead of
-
-   {(uint) 0, +, (uint) 260}
-*/
-
-tree
-chrec_convert (tree type, tree chrec, gimple at_stmt)
-{
-  return chrec_convert_1 (type, chrec, at_stmt, true);
-}
-
 /* Convert CHREC to TYPE.  When the analyzer knows the context in
    which the CHREC is built, it sets AT_STMT to the statement that
    contains the definition of the analyzed variable, otherwise the
@@ -1404,15 +1372,53 @@ keep_cast:
   return res;
 }
 
+/* Convert CHREC to TYPE.  When the analyzer knows the context in
+   which the CHREC is built, it sets AT_STMT to the statement that
+   contains the definition of the analyzed variable, otherwise the
+   conversion is less accurate: the information is used for
+   determining a more accurate estimation of the number of iterations.
+   By default AT_STMT could be safely set to NULL_TREE.
+
+   The following rule is always true: TREE_TYPE (chrec) ==
+   TREE_TYPE (CHREC_LEFT (chrec)) == TREE_TYPE (CHREC_RIGHT (chrec)).
+   An example of what could happen when adding two chrecs and the type
+   of the CHREC_RIGHT is different than CHREC_LEFT is:
+
+   {(uint) 0, +, (uchar) 10} +
+   {(uint) 0, +, (uchar) 250}
+
+   that would produce a wrong result if CHREC_RIGHT is not (uint):
+
+   {(uint) 0, +, (uchar) 4}
+
+   instead of
+
+   {(uint) 0, +, (uint) 260}
+
+   USE_OVERFLOW_SEMANTICS is true if this function should assume that
+   the rules for overflow of the given language apply (e.g., that signed
+   arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
+   tests, but also to enforce that the result follows them.  */
+
+tree
+chrec_convert (tree type, tree chrec, gimple at_stmt,
+	       bool use_overflow_semantics)
+{
+  return chrec_convert_1 (type, chrec, at_stmt, use_overflow_semantics);
+}
+
 /* Convert CHREC to TYPE, without regard to signed overflows.  Returns the new
    chrec if something else than what chrec_convert would do happens, NULL_TREE
-   otherwise.  */
+   otherwise.  This function set TRUE to variable pointed by FOLD_CONVERSIONS
+   if the result chrec may overflow.  */
 
 tree
-chrec_convert_aggressive (tree type, tree chrec)
+chrec_convert_aggressive (tree type, tree chrec, bool *fold_conversions)
 {
   tree inner_type, left, right, lc, rc, rtype;
 
+  gcc_assert (fold_conversions != NULL);
+
   if (automatically_generated_chrec_p (chrec)
       || TREE_CODE (chrec) != POLYNOMIAL_CHREC)
     return NULL_TREE;
@@ -1421,17 +1427,33 @@ chrec_convert_aggressive (tree type, tree chrec)
   if (TYPE_PRECISION (type) > TYPE_PRECISION (inner_type))
     return NULL_TREE;
 
+  if (useless_type_conversion_p (type, inner_type))
+    return NULL_TREE;
+
+  if (!*fold_conversions && evolution_function_is_affine_p (chrec))
+    {
+      tree base, step;
+      struct loop *loop;
+
+      loop = get_chrec_loop (chrec);
+      base = CHREC_LEFT (chrec);
+      step = CHREC_RIGHT (chrec);
+      if (convert_affine_scev (loop, type, &base, &step, NULL, true))
+	return build_polynomial_chrec (loop->num, base, step);
+    }
   rtype = POINTER_TYPE_P (type) ? sizetype : type;
 
   left = CHREC_LEFT (chrec);
   right = CHREC_RIGHT (chrec);
-  lc = chrec_convert_aggressive (type, left);
+  lc = chrec_convert_aggressive (type, left, fold_conversions);
   if (!lc)
     lc = chrec_convert (type, left, NULL);
-  rc = chrec_convert_aggressive (rtype, right);
+  rc = chrec_convert_aggressive (rtype, right, fold_conversions);
   if (!rc)
     rc = chrec_convert (rtype, right, NULL);
 
+  *fold_conversions = true;
+
   return build_polynomial_chrec (CHREC_VARIABLE (chrec), lc, rc);
 }
 
diff --git a/gcc-4.9/gcc/tree-chrec.h b/gcc-4.9/gcc/tree-chrec.h
index 90cc7a74b..ea4627751 100644
--- a/gcc-4.9/gcc/tree-chrec.h
+++ b/gcc-4.9/gcc/tree-chrec.h
@@ -59,9 +59,9 @@ enum ev_direction scev_direction (const_tree);
 extern tree chrec_fold_plus (tree, tree, tree);
 extern tree chrec_fold_minus (tree, tree, tree);
 extern tree chrec_fold_multiply (tree, tree, tree);
-extern tree chrec_convert (tree, tree, gimple);
+extern tree chrec_convert (tree, tree, gimple, bool = true);
 extern tree chrec_convert_rhs (tree, tree, gimple);
-extern tree chrec_convert_aggressive (tree, tree);
+extern tree chrec_convert_aggressive (tree, tree, bool *);
 
 /* Operations.  */
 extern tree chrec_apply (unsigned, tree, tree);
diff --git a/gcc-4.9/gcc/tree-scalar-evolution.c b/gcc-4.9/gcc/tree-scalar-evolution.c
index f1ddc24b1..218baa406 100644
--- a/gcc-4.9/gcc/tree-scalar-evolution.c
+++ b/gcc-4.9/gcc/tree-scalar-evolution.c
@@ -2116,7 +2116,7 @@ analyze_scalar_evolution_in_loop (struct loop *wrto_loop, struct loop *use_loop,
   /* We cannot just do
 
      tmp = analyze_scalar_evolution (use_loop, version);
-     ev = resolve_mixers (wrto_loop, tmp);
+     ev = resolve_mixers (wrto_loop, tmp, folded_casts);
 
      as resolve_mixers would query the scalar evolution with respect to
      wrto_loop.  For example, in the situation described in the function
@@ -2125,9 +2125,9 @@ analyze_scalar_evolution_in_loop (struct loop *wrto_loop, struct loop *use_loop,
 
      analyze_scalar_evolution (use_loop, version) = k2
 
-     and resolve_mixers (loop1, k2) finds that the value of k2 in loop 1
-     is 100, which is a wrong result, since we are interested in the
-     value in loop 3.
+     and resolve_mixers (loop1, k2, folded_casts) finds that the value of
+     k2 in loop 1 is 100, which is a wrong result, since we are interested
+     in the value in loop 3.
 
      Instead, we need to proceed from use_loop to wrto_loop loop by loop,
      each time checking that there is no evolution in the inner loop.  */
@@ -2137,10 +2137,7 @@ analyze_scalar_evolution_in_loop (struct loop *wrto_loop, struct loop *use_loop,
   while (1)
     {
       tmp = analyze_scalar_evolution (use_loop, ev);
-      ev = resolve_mixers (use_loop, tmp);
-
-      if (folded_casts && tmp != ev)
-	*folded_casts = true;
+      ev = resolve_mixers (use_loop, tmp, folded_casts);
 
       if (use_loop == wrto_loop)
 	return ev;
@@ -2262,7 +2259,7 @@ loop_closed_phi_def (tree var)
 }
 
 static tree instantiate_scev_r (basic_block, struct loop *, struct loop *,
-				tree, bool, int);
+				tree, bool *, int);
 
 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
    and EVOLUTION_LOOP, that were left under a symbolic form.
@@ -2271,9 +2268,10 @@ static tree instantiate_scev_r (basic_block, struct loop *, struct loop *,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2282,7 +2280,7 @@ static tree
 instantiate_scev_name (basic_block instantiate_below,
 		       struct loop *evolution_loop, struct loop *inner_loop,
 		       tree chrec,
-		       bool fold_conversions,
+		       bool *fold_conversions,
 		       int size_expr)
 {
   tree res;
@@ -2376,9 +2374,10 @@ instantiate_scev_name (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2386,7 +2385,7 @@ instantiate_scev_name (basic_block instantiate_below,
 static tree
 instantiate_scev_poly (basic_block instantiate_below,
 		       struct loop *evolution_loop, struct loop *,
-		       tree chrec, bool fold_conversions, int size_expr)
+		       tree chrec, bool *fold_conversions, int size_expr)
 {
   tree op1;
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2420,9 +2419,10 @@ instantiate_scev_poly (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2432,7 +2432,7 @@ instantiate_scev_binary (basic_block instantiate_below,
 			 struct loop *evolution_loop, struct loop *inner_loop,
 			 tree chrec, enum tree_code code,
 			 tree type, tree c0, tree c1,
-			 bool fold_conversions, int size_expr)
+			 bool *fold_conversions, int size_expr)
 {
   tree op1;
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
@@ -2478,9 +2478,10 @@ instantiate_scev_binary (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2488,7 +2489,7 @@ instantiate_scev_binary (basic_block instantiate_below,
 static tree
 instantiate_array_ref (basic_block instantiate_below,
 		       struct loop *evolution_loop, struct loop *inner_loop,
-		       tree chrec, bool fold_conversions, int size_expr)
+		       tree chrec, bool *fold_conversions, int size_expr)
 {
   tree res;
   tree index = TREE_OPERAND (chrec, 1);
@@ -2515,9 +2516,10 @@ instantiate_array_ref (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2526,7 +2528,7 @@ static tree
 instantiate_scev_convert (basic_block instantiate_below,
 			  struct loop *evolution_loop, struct loop *inner_loop,
 			  tree chrec, tree type, tree op,
-			  bool fold_conversions, int size_expr)
+			  bool *fold_conversions, int size_expr)
 {
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
 				 inner_loop, op,
@@ -2537,19 +2539,21 @@ instantiate_scev_convert (basic_block instantiate_below,
 
   if (fold_conversions)
     {
-      tree tmp = chrec_convert_aggressive (type, op0);
+      tree tmp = chrec_convert_aggressive (type, op0, fold_conversions);
       if (tmp)
 	return tmp;
-    }
 
-  if (chrec && op0 == op)
-    return chrec;
+      /* If we used chrec_convert_aggressive, we can no longer assume that
+	 signed chrecs do not overflow, as chrec_convert does, so avoid
+	 calling it in that case.  */
+      if (*fold_conversions)
+	{
+	  if (chrec && op0 == op)
+	    return chrec;
 
-  /* If we used chrec_convert_aggressive, we can no longer assume that
-     signed chrecs do not overflow, as chrec_convert does, so avoid
-     calling it in that case.  */
-  if (fold_conversions)
-    return fold_convert (type, op0);
+	  return fold_convert (type, op0);
+	}
+    }
 
   return chrec_convert (type, op0, NULL);
 }
@@ -2563,9 +2567,10 @@ instantiate_scev_convert (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2575,7 +2580,7 @@ instantiate_scev_not (basic_block instantiate_below,
 		      struct loop *evolution_loop, struct loop *inner_loop,
 		      tree chrec,
 		      enum tree_code code, tree type, tree op,
-		      bool fold_conversions, int size_expr)
+		      bool *fold_conversions, int size_expr)
 {
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
 				 inner_loop, op,
@@ -2613,9 +2618,10 @@ instantiate_scev_not (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2624,7 +2630,7 @@ static tree
 instantiate_scev_3 (basic_block instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
-		    bool fold_conversions, int size_expr)
+		    bool *fold_conversions, int size_expr)
 {
   tree op1, op2;
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2661,9 +2667,10 @@ instantiate_scev_3 (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2672,7 +2679,7 @@ static tree
 instantiate_scev_2 (basic_block instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
-		    bool fold_conversions, int size_expr)
+		    bool *fold_conversions, int size_expr)
 {
   tree op1;
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2701,9 +2708,10 @@ instantiate_scev_2 (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2712,7 +2720,7 @@ static tree
 instantiate_scev_1 (basic_block instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
-		    bool fold_conversions, int size_expr)
+		    bool *fold_conversions, int size_expr)
 {
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
 				 inner_loop, TREE_OPERAND (chrec, 0),
@@ -2734,9 +2742,10 @@ instantiate_scev_1 (basic_block instantiate_below,
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2745,7 +2754,7 @@ static tree
 instantiate_scev_r (basic_block instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
-		    bool fold_conversions, int size_expr)
+		    bool *fold_conversions, int size_expr)
 {
   /* Give up if the expression is larger than the MAX that we allow.  */
   if (size_expr++ > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_SIZE))
@@ -2870,7 +2879,7 @@ instantiate_scev (basic_block instantiate_below, struct loop *evolution_loop,
     }
 
   res = instantiate_scev_r (instantiate_below, evolution_loop,
-			    NULL, chrec, false, 0);
+			    NULL, chrec, NULL, 0);
 
   if (destr)
     {
@@ -2894,9 +2903,10 @@ instantiate_scev (basic_block instantiate_below, struct loop *evolution_loop,
    of an expression.  */
 
 tree
-resolve_mixers (struct loop *loop, tree chrec)
+resolve_mixers (struct loop *loop, tree chrec, bool *folded_casts)
 {
   bool destr = false;
+  bool fold_conversions = false;
   if (!global_cache)
     {
       global_cache = new instantiate_cache_type;
@@ -2904,7 +2914,10 @@ resolve_mixers (struct loop *loop, tree chrec)
     }
 
   tree ret = instantiate_scev_r (block_before_loop (loop), loop, NULL,
-				 chrec, true, 0);
+				 chrec, &fold_conversions, 0);
+
+  if (folded_casts && !*folded_casts)
+    *folded_casts = fold_conversions;
 
   if (destr)
     {
@@ -3369,7 +3382,8 @@ scev_const_prop (void)
 	      && !INTEGRAL_TYPE_P (type))
 	    continue;
 
-	  ev = resolve_mixers (loop, analyze_scalar_evolution (loop, name));
+	  ev = resolve_mixers (loop, analyze_scalar_evolution (loop, name),
+			       NULL);
 	  if (!is_gimple_min_invariant (ev)
 	      || !may_propagate_copy (name, ev))
 	    continue;
diff --git a/gcc-4.9/gcc/tree-scalar-evolution.h b/gcc-4.9/gcc/tree-scalar-evolution.h
index 556997684..cb9af5122 100644
--- a/gcc-4.9/gcc/tree-scalar-evolution.h
+++ b/gcc-4.9/gcc/tree-scalar-evolution.h
@@ -31,7 +31,7 @@ extern void scev_reset_htab (void);
 extern void scev_finalize (void);
 extern tree analyze_scalar_evolution (struct loop *, tree);
 extern tree instantiate_scev (basic_block, struct loop *, tree);
-extern tree resolve_mixers (struct loop *, tree);
+extern tree resolve_mixers (struct loop *, tree, bool *);
 extern void gather_stats_on_scev_database (void);
 extern unsigned int scev_const_prop (void);
 extern bool expression_expensive_p (tree);
diff --git a/gcc-4.9/gcc/tree-ssa-loop-ivopts.c b/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
index 400187964..0bf8e2aff 100644
--- a/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
@@ -142,9 +142,10 @@ struct iv
   tree base_object;	/* A memory object to that the induction variable points.  */
   tree step;		/* Step of the iv (constant only).  */
   tree ssa_name;	/* The ssa name with the value.  */
+  unsigned use_id;	/* The identifier in the use if it is the case.  */
   bool biv_p;		/* Is it a biv?  */
   bool have_use_for;	/* Do we already have a use for it?  */
-  unsigned use_id;	/* The identifier in the use if it is the case.  */
+  bool no_overflow;	/* True if the iv doesn't overflow.  */
 };
 
 /* Per-ssa version information (induction variable descriptions, etc.).  */
@@ -943,10 +944,10 @@ determine_base_object (tree expr)
 }
 
 /* Allocates an induction variable with given initial value BASE and step STEP
-   for loop LOOP.  */
+   for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 
 static struct iv *
-alloc_iv (tree base, tree step)
+alloc_iv (tree base, tree step, bool no_overflow = false)
 {
   tree base_object = base;
   struct iv *iv = XCNEW (struct iv);
@@ -977,21 +978,24 @@ alloc_iv (tree base, tree step)
   iv->have_use_for = false;
   iv->use_id = 0;
   iv->ssa_name = NULL_TREE;
+  iv->no_overflow = no_overflow;
 
   return iv;
 }
 
-/* Sets STEP and BASE for induction variable IV.  */
+/* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
+   doesn't overflow.  */
 
 static void
-set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
+set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
+	bool no_overflow)
 {
   struct version_info *info = name_info (data, iv);
 
   gcc_assert (!info->iv);
 
   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
-  info->iv = alloc_iv (base, step);
+  info->iv = alloc_iv (base, step, no_overflow);
   info->iv->ssa_name = iv;
 }
 
@@ -1013,37 +1017,19 @@ get_iv (struct ivopts_data *data, tree var)
 
       if (!bb
 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
-	set_iv (data, var, var, build_int_cst (type, 0));
+	set_iv (data, var, var, build_int_cst (type, 0), true);
     }
 
   return name_info (data, var)->iv;
 }
 
-/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
-   not define a simple affine biv with nonzero step.  */
-
-static tree
-determine_biv_step (gimple phi)
-{
-  struct loop *loop = gimple_bb (phi)->loop_father;
-  tree name = PHI_RESULT (phi);
-  affine_iv iv;
-
-  if (virtual_operand_p (name))
-    return NULL_TREE;
-
-  if (!simple_iv (loop, loop, name, &iv, true))
-    return NULL_TREE;
-
-  return integer_zerop (iv.step) ? NULL_TREE : iv.step;
-}
-
 /* Finds basic ivs.  */
 
 static bool
 find_bivs (struct ivopts_data *data)
 {
   gimple phi;
+  affine_iv iv;
   tree step, type, base;
   bool found = false;
   struct loop *loop = data->current_loop;
@@ -1056,10 +1042,16 @@ find_bivs (struct ivopts_data *data)
       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 	continue;
 
-      step = determine_biv_step (phi);
-      if (!step)
+      if (virtual_operand_p (PHI_RESULT (phi)))
 	continue;
 
+      if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
+	continue;
+
+      if (integer_zerop (iv.step))
+	continue;
+
+      step = iv.step;
       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
       base = expand_simple_operations (base);
       if (contains_abnormal_ssa_name_p (base)
@@ -1076,7 +1068,7 @@ find_bivs (struct ivopts_data *data)
 	    step = fold_convert (type, step);
 	}
 
-      set_iv (data, PHI_RESULT (phi), base, step);
+      set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
       found = true;
     }
 
@@ -1172,7 +1164,7 @@ find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
   if (!find_givs_in_stmt_scev (data, stmt, &iv))
     return;
 
-  set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
+  set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
 }
 
 /* Finds general ivs in basic block BB.  */
@@ -1584,6 +1576,7 @@ idx_find_step (tree base, tree *idx, void *data)
 {
   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
   struct iv *iv;
+  bool use_overflow_semantics = false;
   tree step, iv_base, iv_step, lbound, off;
   struct loop *loop = dta->ivopts_data->current_loop;
 
@@ -1643,9 +1636,12 @@ idx_find_step (tree base, tree *idx, void *data)
 
   iv_base = iv->base;
   iv_step = iv->step;
+  if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
+    use_overflow_semantics = true;
+
   if (!convert_affine_scev (dta->ivopts_data->current_loop,
 			    sizetype, &iv_base, &iv_step, dta->stmt,
-			    false))
+			    use_overflow_semantics))
     {
       /* The index might wrap.  */
       return false;
-- 
cgit v1.2.3


From 9c3e680bacdd0dbc9f6a6976967b49eb0725c3c5 Mon Sep 17 00:00:00 2001
From: Aditya Kumar <aditya.k7@samsung.com>
Date: Tue, 18 Aug 2015 13:41:31 -0500
Subject: PR tree-optimization/48052

commit 05032b10839cf0498c992c819bf2358e86c22bb0
Author: amker <amker@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue Jun 2 10:19:18 2015 +0000

        PR tree-optimization/48052
        * cfgloop.h (struct control_iv): New.
        (struct loop): New field control_ivs.
        * tree-ssa-loop-niter.c : Include "stor-layout.h".
        (number_of_iterations_lt): Set no_overflow information.
        (number_of_iterations_exit): Init control iv in niter struct.
        (record_control_iv): New.
        (estimate_numbers_of_iterations_loop): Call record_control_iv.
        (loop_exits_before_overflow): New.  Interface factored out of
        scev_probably_wraps_p.
        (scev_probably_wraps_p): Factor loop niter related code into
        loop_exits_before_overflow.
        (free_numbers_of_iterations_estimates_loop): Free control ivs.
        * tree-ssa-loop-niter.h (free_loop_control_ivs): New.

        gcc/testsuite/ChangeLog
        PR tree-optimization/48052
        * gcc.dg/tree-ssa/scev-8.c: New.
        * gcc.dg/tree-ssa/scev-9.c: New.
        * gcc.dg/tree-ssa/scev-10.c: New.
        * gcc.dg/vect/pr48052.c: New.

    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@224020 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc-4.9/gcc/cfgloop.h                           |  11 +
 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c |  21 ++
 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c  |  62 +++++
 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c  |  21 ++
 gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c     |  26 +++
 gcc-4.9/gcc/tree-ssa-loop-niter.c               | 290 +++++++++++++++++++-----
 gcc-4.9/gcc/tree-ssa-loop-niter.h               |   1 +
 7 files changed, 370 insertions(+), 62 deletions(-)
 create mode 100644 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
 create mode 100644 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
 create mode 100644 gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
 create mode 100644 gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c

(limited to 'gcc-4.9')

diff --git a/gcc-4.9/gcc/cfgloop.h b/gcc-4.9/gcc/cfgloop.h
index c7e417bf2..11bfc810f 100644
--- a/gcc-4.9/gcc/cfgloop.h
+++ b/gcc-4.9/gcc/cfgloop.h
@@ -100,6 +100,14 @@ enum loop_estimation
   EST_LAST
 };
 
+/* The structure describing non-overflow control induction variable for
+   loop's exit edge.  */
+struct GTY ((chain_next ("%h.next"))) control_iv {
+  tree base;
+  tree step;
+  struct control_iv *next;
+};
+
 /* Structure to hold information for each natural loop.  */
 struct GTY ((chain_next ("%h.next"))) loop {
   /* Index into loops array.  */
@@ -187,6 +195,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
   /* Upper bound on number of iterations of a loop.  */
   struct nb_iter_bound *bounds;
 
+  /* Non-overflow control ivs of a loop.  */
+  struct control_iv *control_ivs;
+
   /* Head of the cyclic list of the exits of the loop.  */
   struct loop_exit *exits;
 
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
new file mode 100644
index 000000000..2e16c894b
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (signed char s, signed char l)
+{
+  signed char i;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array reference is scev.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
new file mode 100644
index 000000000..766f674d5
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo1 (long long s, long long l)
+{
+  long long i;
+
+  for (i = s; i < l; i++)
+    {
+      a[(short)i] = 0;
+    }
+  return 0;
+}
+
+int
+foo2 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i, step = 1;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[c];
+      c += step;
+    }
+
+  return sum;
+}
+
+int
+foo3 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i += c)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+int
+foo4 (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-not "use \[0-9\]\n  address" "ivopts" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
new file mode 100644
index 000000000..557e33850
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i < l; i += 1)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array reference is scev.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c
new file mode 100644
index 000000000..c822ebd41
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc-4.9/gcc/tree-ssa-loop-niter.c b/gcc-4.9/gcc/tree-ssa-loop-niter.c
index 8fb72b6a5..cb9214fbb 100644
--- a/gcc-4.9/gcc/tree-ssa-loop-niter.c
+++ b/gcc-4.9/gcc/tree-ssa-loop-niter.c
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stor-layout.h"
 #include "calls.h"
 #include "expr.h"
 #include "tm_p.h"
@@ -55,7 +56,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "stringpool.h"
 #include "tree-ssanames.h"
-
+#include "ggc.h"
 
 #define SWAP(X, Y) do { affine_iv *tmp = (X); (X) = (Y); (Y) = tmp; } while (0)
 
@@ -1161,6 +1162,7 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
 					  iv1->base, iv0->base);
       niter->niter = delta;
       niter->max = mpz_get_double_int (niter_type, bnds->up, false);
+      niter->control.no_overflow = true;
       return true;
     }
 
@@ -1938,6 +1940,9 @@ number_of_iterations_exit (struct loop *loop, edge exit,
     return false;
 
   niter->assumptions = boolean_false_node;
+  niter->control.base = NULL_TREE;
+  niter->control.step = NULL_TREE;
+  niter->control.no_overflow = false;
   stmt = last_stmt (exit->src);
   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
     return false;
@@ -2714,6 +2719,29 @@ record_estimate (struct loop *loop, tree bound, double_int i_bound,
   record_niter_bound (loop, i_bound, realistic, upper);
 }
 
+/* Records the control iv analyzed in NITER for LOOP if the iv is valid
+   and doesn't overflow.  */
+
+static void
+record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
+{
+  struct control_iv *iv;
+
+  if (!niter->control.base || !niter->control.step)
+    return;
+
+  if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
+    return;
+
+  iv = ggc_alloc_control_iv ();
+  iv->base = niter->control.base;
+  iv->step = niter->control.step;
+  iv->next = loop->control_ivs;
+  loop->control_ivs = iv;
+
+  return;
+}
+
 /* Record the estimate on number of iterations of LOOP based on the fact that
    the induction variable BASE + STEP * i evaluated in STMT does not wrap and
    its values belong to the range <LOW, HIGH>.  REALISTIC is true if the
@@ -3452,6 +3480,7 @@ estimate_numbers_of_iterations_loop (struct loop *loop)
       record_estimate (loop, niter, niter_desc.max,
 		       last_stmt (ex->src),
 		       true, ex == likely_exit, true);
+      record_control_iv (loop, &niter_desc);
     }
   exits.release ();
 
@@ -3759,6 +3788,189 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Return true if we can prove LOOP is exited before evolution of induction
+   variabled {BASE, STEP} overflows with respect to its type bound.  */
+
+static bool
+loop_exits_before_overflow (tree base, tree step,
+			    gimple at_stmt, struct loop *loop)
+{
+  double_int niter;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
+  tree e, delta, step_abs, unsigned_base;
+  tree type = TREE_TYPE (step);
+  tree unsigned_type, valid_niter;
+
+  /* Don't issue signed overflow warnings.  */
+  fold_defer_overflow_warnings ();
+
+  /* Compute the number of iterations before we reach the bound of the
+     type, and verify that the loop is exited before this occurs.  */
+  unsigned_type = unsigned_type_for (type);
+  unsigned_base = fold_convert (unsigned_type, base);
+
+  if (tree_int_cst_sign_bit (step))
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   lower_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
+      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
+			      fold_convert (unsigned_type, step));
+    }
+  else
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   upper_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
+      step_abs = fold_convert (unsigned_type, step);
+    }
+
+  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
+
+  estimate_numbers_of_iterations_loop (loop);
+
+  if (max_loop_iterations (loop, &niter)
+      && double_int_fits_to_tree_p (TREE_TYPE (valid_niter), niter)
+      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
+			   double_int_to_tree (TREE_TYPE (valid_niter),
+					     niter))) != NULL
+      && integer_nonzerop (e))
+    {
+      fold_undefer_and_ignore_overflow_warnings ();
+      return true;
+    }
+  if (at_stmt)
+    for (bound = loop->bounds; bound; bound = bound->next)
+      {
+	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
+	  {
+	    fold_undefer_and_ignore_overflow_warnings ();
+	    return true;
+	  }
+      }
+  fold_undefer_and_ignore_overflow_warnings ();
+
+  /* Try to prove loop is exited before {base, step} overflows with the
+     help of analyzed loop control IV.  This is done only for IVs with
+     constant step because otherwise we don't have the information.  */
+  if (TREE_CODE (step) == INTEGER_CST)
+    for (civ = loop->control_ivs; civ; civ = civ->next)
+      {
+	enum tree_code code;
+	tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
+
+	/* Have to consider type difference because operand_equal_p ignores
+	   that for constants.  */
+	if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
+	    || element_precision (type) != element_precision (civ_type))
+	  continue;
+
+	/* Only consider control IV with same step.  */
+	if (!operand_equal_p (step, civ->step, 0))
+	  continue;
+
+	/* Done proving if this is a no-overflow control IV.  */
+	if (operand_equal_p (base, civ->base, 0))
+	  return true;
+
+	/* If this is a before stepping control IV, in other words, we have
+
+	     {civ_base, step} = {base + step, step}
+
+	   Because civ {base + step, step} doesn't overflow during loop
+	   iterations, {base, step} will not overflow if we can prove the
+	   operation "base + step" does not overflow.  Specifically, we try
+	   to prove below conditions are satisfied:
+
+	     base <= UPPER_BOUND (type) - step  ;;step > 0
+	     base >= LOWER_BOUND (type) - step  ;;step < 0
+
+	   by proving the reverse conditions are false using loop's initial
+	   condition.  */
+	stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
+	if (operand_equal_p (stepped, civ->base, 0))
+	  {
+	    if (tree_int_cst_sign_bit (step))
+	      {
+		code = LT_EXPR;
+		extreme = lower_bound_in_type (type, type);
+	      }
+	    else
+	      {
+		code = GT_EXPR;
+		extreme = upper_bound_in_type (type, type);
+	      }
+	    extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	    e = fold_build2 (code, boolean_type_node, base, extreme);
+	    e = simplify_using_initial_conditions (loop, e);
+	    if (integer_zerop (e))
+	      return true;
+
+	    continue;
+	  }
+
+	/* Similar to above, only in this case we have:
+
+	     {civ_base, step} = {(signed T)((unsigned T)base + step), step}
+	     && TREE_TYPE (civ_base) = signed T.
+
+	   We prove that below condition is satisfied:
+
+	     (signed T)((unsigned T)base + step)
+	       == (signed T)(unsigned T)base + step
+	       == base + step
+
+	   because of exact the same reason as above.  This also proves
+	   there is no overflow in the operation "base + step", thus the
+	   induction variable {base, step} during loop iterations.
+
+	   This is necessary to handle cases as below:
+
+	     int foo (int *a, signed char s, signed char l)
+	       {
+		 signed char i;
+		 for (i = s; i < l; i++)
+		   a[i] = 0;
+		 return 0;
+	       }
+
+	   The variable I is firstly converted to type unsigned char,
+	   incremented, then converted back to type signed char.  */
+	if (!CONVERT_EXPR_P (civ->base) || TREE_TYPE (civ->base) != type)
+	  continue;
+	e = TREE_OPERAND (civ->base, 0);
+	if (TREE_CODE (e) != PLUS_EXPR
+	    || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
+	    || !operand_equal_p (step,
+				 fold_convert (type,
+					       TREE_OPERAND (e, 1)), 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	if (!CONVERT_EXPR_P (e) || !operand_equal_p (e, unsigned_base, 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	gcc_assert (operand_equal_p (e, base, 0));
+	if (tree_int_cst_sign_bit (step))
+	  {
+	    code = LT_EXPR;
+	    extreme = lower_bound_in_type (type, type);
+	  }
+	else
+	  {
+	    code = GT_EXPR;
+	    extreme = upper_bound_in_type (type, type);
+	  }
+	extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	e = fold_build2 (code, boolean_type_node, base, extreme);
+	e = simplify_using_initial_conditions (loop, e);
+	if (integer_zerop (e))
+	  return true;
+      }
+
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3774,13 +3986,6 @@ scev_probably_wraps_p (tree base, tree step,
 		       gimple at_stmt, struct loop *loop,
 		       bool use_overflow_semantics)
 {
-  tree delta, step_abs;
-  tree unsigned_type, valid_niter;
-  tree type = TREE_TYPE (step);
-  tree e;
-  double_int niter;
-  struct nb_iter_bound *bound;
-
   /* FIXME: We really need something like
      http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
 
@@ -3814,56 +4019,8 @@ scev_probably_wraps_p (tree base, tree step,
   if (TREE_CODE (step) != INTEGER_CST)
     return true;
 
-  /* Don't issue signed overflow warnings.  */
-  fold_defer_overflow_warnings ();
-
-  /* Otherwise, compute the number of iterations before we reach the
-     bound of the type, and verify that the loop is exited before this
-     occurs.  */
-  unsigned_type = unsigned_type_for (type);
-  base = fold_convert (unsigned_type, base);
-
-  if (tree_int_cst_sign_bit (step))
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   lower_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
-      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
-			      fold_convert (unsigned_type, step));
-    }
-  else
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   upper_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
-      step_abs = fold_convert (unsigned_type, step);
-    }
-
-  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
-
-  estimate_numbers_of_iterations_loop (loop);
-
-  if (max_loop_iterations (loop, &niter)
-      && double_int_fits_to_tree_p (TREE_TYPE (valid_niter), niter)
-      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
-			   double_int_to_tree (TREE_TYPE (valid_niter),
-					       niter))) != NULL
-      && integer_nonzerop (e))
-    {
-      fold_undefer_and_ignore_overflow_warnings ();
-      return false;
-    }
-  if (at_stmt)
-    for (bound = loop->bounds; bound; bound = bound->next)
-      {
-	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
-	  {
-	    fold_undefer_and_ignore_overflow_warnings ();
-	    return false;
-	  }
-      }
-
-  fold_undefer_and_ignore_overflow_warnings ();
+  if (loop_exits_before_overflow (base, step, at_stmt, loop))
+    return false;
 
   /* At this point we still don't have a proof that the iv does not
      overflow: give up.  */
@@ -3875,17 +4032,26 @@ scev_probably_wraps_p (tree base, tree step,
 void
 free_numbers_of_iterations_estimates_loop (struct loop *loop)
 {
-  struct nb_iter_bound *bound, *next;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
 
   loop->nb_iterations = NULL;
   loop->estimate_state = EST_NOT_COMPUTED;
-  for (bound = loop->bounds; bound; bound = next)
+  for (bound = loop->bounds; bound;)
     {
-      next = bound->next;
+      struct nb_iter_bound *next = bound->next;
       ggc_free (bound);
+      bound = next;
     }
-
   loop->bounds = NULL;
+
+  for (civ = loop->control_ivs; civ;)
+    {
+      struct control_iv *next = civ->next;
+      ggc_free (civ);
+      civ = next;
+    }
+  loop->control_ivs = NULL;
 }
 
 /* Frees the information on upper bounds on numbers of iterations of loops.  */
diff --git a/gcc-4.9/gcc/tree-ssa-loop-niter.h b/gcc-4.9/gcc/tree-ssa-loop-niter.h
index df0d64d02..dd2535851 100644
--- a/gcc-4.9/gcc/tree-ssa-loop-niter.h
+++ b/gcc-4.9/gcc/tree-ssa-loop-niter.h
@@ -41,6 +41,7 @@ extern void estimate_numbers_of_iterations (void);
 extern bool stmt_dominates_stmt_p (gimple, gimple);
 extern bool nowrap_type_p (tree);
 extern bool scev_probably_wraps_p (tree, tree, gimple, struct loop *, bool);
+extern void free_loop_control_ivs (struct loop *);
 extern void free_numbers_of_iterations_estimates_loop (struct loop *);
 extern void free_numbers_of_iterations_estimates (void);
 extern void substitute_in_loop_info (struct loop *, tree, tree);
-- 
cgit v1.2.3