aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9')
-rw-r--r--gcc-4.9/gcc/cfgloop.h11
-rw-r--r--gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c54
-rw-r--r--gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c21
-rw-r--r--gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c2
-rw-r--r--gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c2
-rw-r--r--gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c62
-rw-r--r--gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c21
-rw-r--r--gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c26
-rw-r--r--gcc-4.9/gcc/tree-chrec.c104
-rw-r--r--gcc-4.9/gcc/tree-chrec.h4
-rw-r--r--gcc-4.9/gcc/tree-scalar-evolution.c138
-rw-r--r--gcc-4.9/gcc/tree-scalar-evolution.h2
-rw-r--r--gcc-4.9/gcc/tree-ssa-loop-ivopts.c436
-rw-r--r--gcc-4.9/gcc/tree-ssa-loop-niter.c290
-rw-r--r--gcc-4.9/gcc/tree-ssa-loop-niter.h1
15 files changed, 953 insertions, 221 deletions
diff --git a/gcc-4.9/gcc/cfgloop.h b/gcc-4.9/gcc/cfgloop.h
index c7e417bf2..11bfc810f 100644
--- a/gcc-4.9/gcc/cfgloop.h
+++ b/gcc-4.9/gcc/cfgloop.h
@@ -100,6 +100,14 @@ enum loop_estimation
EST_LAST
};
+/* The structure describing non-overflow control induction variable for
+ loop's exit edge. */
+struct GTY ((chain_next ("%h.next"))) control_iv {
+ tree base;
+ tree step;
+ struct control_iv *next;
+};
+
/* Structure to hold information for each natural loop. */
struct GTY ((chain_next ("%h.next"))) loop {
/* Index into loops array. */
@@ -187,6 +195,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
/* Upper bound on number of iterations of a loop. */
struct nb_iter_bound *bounds;
+ /* Non-overflow control ivs of a loop. */
+ struct control_iv *control_ivs;
+
/* Head of the cyclic list of the exits of the loop. */
struct loop_exit *exits;
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
new file mode 100644
index 000000000..c5bddbfba
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+void foo (double *p)
+{
+ int i;
+ for (i = -20000; i < 200000; i+= 40)
+ {
+ p[i+0] = 1.0;
+ p[i+1] = 1.0;
+ p[i+2] = 1.0;
+ p[i+3] = 1.0;
+ p[i+4] = 1.0;
+ p[i+5] = 1.0;
+ p[i+6] = 1.0;
+ p[i+7] = 1.0;
+ p[i+8] = 1.0;
+ p[i+9] = 1.0;
+ p[i+10] = 1.0;
+ p[i+11] = 1.0;
+ p[i+12] = 1.0;
+ p[i+13] = 1.0;
+ p[i+14] = 1.0;
+ p[i+15] = 1.0;
+ p[i+16] = 1.0;
+ p[i+17] = 1.0;
+ p[i+18] = 1.0;
+ p[i+19] = 1.0;
+ p[i+20] = 1.0;
+ p[i+21] = 1.0;
+ p[i+22] = 1.0;
+ p[i+23] = 1.0;
+ p[i+24] = 1.0;
+ p[i+25] = 1.0;
+ p[i+26] = 1.0;
+ p[i+27] = 1.0;
+ p[i+28] = 1.0;
+ p[i+29] = 1.0;
+ p[i+30] = 1.0;
+ p[i+31] = 1.0;
+ p[i+32] = 1.0;
+ p[i+33] = 1.0;
+ p[i+34] = 1.0;
+ p[i+35] = 1.0;
+ p[i+36] = 1.0;
+ p[i+37] = 1.0;
+ p[i+38] = 1.0;
+ p[i+39] = 1.0;
+ }
+}
+
+/* We should groups address type IV uses. */
+/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
new file mode 100644
index 000000000..2e16c894b
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (signed char s, signed char l)
+{
+ signed char i;
+ int sum = 0;
+
+ for (i = s; i < l; i++)
+ {
+ sum += a[i];
+ }
+
+ return sum;
+}
+
+/* Address of array reference is scev. */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n address" 1 "ivopts" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
index 5cac1cefb..28d5c9327 100644
--- a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
@@ -14,5 +14,5 @@ f(int k)
}
}
-/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" { xfail { lp64 || llp64 } } } } */
+/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
index 5f15d622d..6c1e530a9 100644
--- a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
@@ -19,5 +19,5 @@ f(int k)
}
}
-/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" { xfail { lp64 || llp64 } } } } */
+/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
new file mode 100644
index 000000000..766f674d5
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo1 (long long s, long long l)
+{
+ long long i;
+
+ for (i = s; i < l; i++)
+ {
+ a[(short)i] = 0;
+ }
+ return 0;
+}
+
+int
+foo2 (unsigned char s, unsigned char l, unsigned char c)
+{
+ unsigned char i, step = 1;
+ int sum = 0;
+
+ for (i = s; i < l; i++)
+ {
+ sum += a[c];
+ c += step;
+ }
+
+ return sum;
+}
+
+int
+foo3 (unsigned char s, unsigned char l, unsigned char c)
+{
+ unsigned char i;
+ int sum = 0;
+
+ for (i = s; i != l; i += c)
+ {
+ sum += a[i];
+ }
+
+ return sum;
+}
+
+int
+foo4 (unsigned char s, unsigned char l)
+{
+ unsigned char i;
+ int sum = 0;
+
+ for (i = s; i != l; i++)
+ {
+ sum += a[i];
+ }
+
+ return sum;
+}
+
+/* Address of array references are not scevs. */
+/* { dg-final { scan-tree-dump-not "use \[0-9\]\n address" "ivopts" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
new file mode 100644
index 000000000..557e33850
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s, unsigned char l)
+{
+ unsigned char i;
+ int sum = 0;
+
+ for (i = s; i < l; i += 1)
+ {
+ sum += a[i];
+ }
+
+ return sum;
+}
+
+/* Address of array reference is scev. */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n address" 1 "ivopts" } } */
diff --git a/gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c
new file mode 100644
index 000000000..c822ebd41
--- /dev/null
+++ b/gcc-4.9/gcc/testsuite/gcc.dg/vect/pr48052.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+
+int foo(int* A, int* B, unsigned start, unsigned BS)
+{
+ int s;
+ for (unsigned k = start; k < start + BS; k++)
+ {
+ s += A[k] * B[k];
+ }
+
+ return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+ int s;
+ for (unsigned k = 0; k < BS; k++)
+ {
+ s += A[k] * B[k];
+ }
+
+ return s;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc-4.9/gcc/tree-chrec.c b/gcc-4.9/gcc/tree-chrec.c
index b9350f015..3e34c65dc 100644
--- a/gcc-4.9/gcc/tree-chrec.c
+++ b/gcc-4.9/gcc/tree-chrec.c
@@ -1162,8 +1162,6 @@ nb_vars_in_chrec (tree chrec)
}
}
-static tree chrec_convert_1 (tree, tree, gimple, bool);
-
/* Converts BASE and STEP of affine scev to TYPE. LOOP is the loop whose iv
the scev corresponds to. AT_STMT is the statement at that the scev is
evaluated. USE_OVERFLOW_SEMANTICS is true if this function should assume that
@@ -1238,8 +1236,7 @@ convert_affine_scev (struct loop *loop, tree type,
use_overflow_semantics))
return false;
- new_base = chrec_convert_1 (type, *base, at_stmt,
- use_overflow_semantics);
+ new_base = chrec_convert (type, *base, at_stmt, use_overflow_semantics);
/* The step must be sign extended, regardless of the signedness
of CT and TYPE. This only needs to be handled specially when
CT is unsigned -- to avoid e.g. unsigned char [100, +, 255]
@@ -1250,10 +1247,11 @@ convert_affine_scev (struct loop *loop, tree type,
if (TYPE_PRECISION (step_type) > TYPE_PRECISION (ct) && TYPE_UNSIGNED (ct))
{
tree signed_ct = build_nonstandard_integer_type (TYPE_PRECISION (ct), 0);
- new_step = chrec_convert_1 (signed_ct, new_step, at_stmt,
- use_overflow_semantics);
+ new_step = chrec_convert (signed_ct, new_step, at_stmt,
+ use_overflow_semantics);
}
- new_step = chrec_convert_1 (step_type, new_step, at_stmt, use_overflow_semantics);
+ new_step = chrec_convert (step_type, new_step, at_stmt,
+ use_overflow_semantics);
if (automatically_generated_chrec_p (new_base)
|| automatically_generated_chrec_p (new_step))
@@ -1290,36 +1288,6 @@ chrec_convert_rhs (tree type, tree chrec, gimple at_stmt)
determining a more accurate estimation of the number of iterations.
By default AT_STMT could be safely set to NULL_TREE.
- The following rule is always true: TREE_TYPE (chrec) ==
- TREE_TYPE (CHREC_LEFT (chrec)) == TREE_TYPE (CHREC_RIGHT (chrec)).
- An example of what could happen when adding two chrecs and the type
- of the CHREC_RIGHT is different than CHREC_LEFT is:
-
- {(uint) 0, +, (uchar) 10} +
- {(uint) 0, +, (uchar) 250}
-
- that would produce a wrong result if CHREC_RIGHT is not (uint):
-
- {(uint) 0, +, (uchar) 4}
-
- instead of
-
- {(uint) 0, +, (uint) 260}
-*/
-
-tree
-chrec_convert (tree type, tree chrec, gimple at_stmt)
-{
- return chrec_convert_1 (type, chrec, at_stmt, true);
-}
-
-/* Convert CHREC to TYPE. When the analyzer knows the context in
- which the CHREC is built, it sets AT_STMT to the statement that
- contains the definition of the analyzed variable, otherwise the
- conversion is less accurate: the information is used for
- determining a more accurate estimation of the number of iterations.
- By default AT_STMT could be safely set to NULL_TREE.
-
USE_OVERFLOW_SEMANTICS is true if this function should assume that
the rules for overflow of the given language apply (e.g., that signed
arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
@@ -1404,15 +1372,53 @@ keep_cast:
return res;
}
+/* Convert CHREC to TYPE. When the analyzer knows the context in
+ which the CHREC is built, it sets AT_STMT to the statement that
+ contains the definition of the analyzed variable, otherwise the
+ conversion is less accurate: the information is used for
+ determining a more accurate estimation of the number of iterations.
+ By default AT_STMT could be safely set to NULL_TREE.
+
+ The following rule is always true: TREE_TYPE (chrec) ==
+ TREE_TYPE (CHREC_LEFT (chrec)) == TREE_TYPE (CHREC_RIGHT (chrec)).
+ An example of what could happen when adding two chrecs and the type
+ of the CHREC_RIGHT is different than CHREC_LEFT is:
+
+ {(uint) 0, +, (uchar) 10} +
+ {(uint) 0, +, (uchar) 250}
+
+ that would produce a wrong result if CHREC_RIGHT is not (uint):
+
+ {(uint) 0, +, (uchar) 4}
+
+ instead of
+
+ {(uint) 0, +, (uint) 260}
+
+ USE_OVERFLOW_SEMANTICS is true if this function should assume that
+ the rules for overflow of the given language apply (e.g., that signed
+ arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
+ tests, but also to enforce that the result follows them. */
+
+tree
+chrec_convert (tree type, tree chrec, gimple at_stmt,
+ bool use_overflow_semantics)
+{
+ return chrec_convert_1 (type, chrec, at_stmt, use_overflow_semantics);
+}
+
/* Convert CHREC to TYPE, without regard to signed overflows. Returns the new
chrec if something else than what chrec_convert would do happens, NULL_TREE
- otherwise. */
+ otherwise. This function set TRUE to variable pointed by FOLD_CONVERSIONS
+ if the result chrec may overflow. */
tree
-chrec_convert_aggressive (tree type, tree chrec)
+chrec_convert_aggressive (tree type, tree chrec, bool *fold_conversions)
{
tree inner_type, left, right, lc, rc, rtype;
+ gcc_assert (fold_conversions != NULL);
+
if (automatically_generated_chrec_p (chrec)
|| TREE_CODE (chrec) != POLYNOMIAL_CHREC)
return NULL_TREE;
@@ -1421,17 +1427,33 @@ chrec_convert_aggressive (tree type, tree chrec)
if (TYPE_PRECISION (type) > TYPE_PRECISION (inner_type))
return NULL_TREE;
+ if (useless_type_conversion_p (type, inner_type))
+ return NULL_TREE;
+
+ if (!*fold_conversions && evolution_function_is_affine_p (chrec))
+ {
+ tree base, step;
+ struct loop *loop;
+
+ loop = get_chrec_loop (chrec);
+ base = CHREC_LEFT (chrec);
+ step = CHREC_RIGHT (chrec);
+ if (convert_affine_scev (loop, type, &base, &step, NULL, true))
+ return build_polynomial_chrec (loop->num, base, step);
+ }
rtype = POINTER_TYPE_P (type) ? sizetype : type;
left = CHREC_LEFT (chrec);
right = CHREC_RIGHT (chrec);
- lc = chrec_convert_aggressive (type, left);
+ lc = chrec_convert_aggressive (type, left, fold_conversions);
if (!lc)
lc = chrec_convert (type, left, NULL);
- rc = chrec_convert_aggressive (rtype, right);
+ rc = chrec_convert_aggressive (rtype, right, fold_conversions);
if (!rc)
rc = chrec_convert (rtype, right, NULL);
+ *fold_conversions = true;
+
return build_polynomial_chrec (CHREC_VARIABLE (chrec), lc, rc);
}
diff --git a/gcc-4.9/gcc/tree-chrec.h b/gcc-4.9/gcc/tree-chrec.h
index 90cc7a74b..ea4627751 100644
--- a/gcc-4.9/gcc/tree-chrec.h
+++ b/gcc-4.9/gcc/tree-chrec.h
@@ -59,9 +59,9 @@ enum ev_direction scev_direction (const_tree);
extern tree chrec_fold_plus (tree, tree, tree);
extern tree chrec_fold_minus (tree, tree, tree);
extern tree chrec_fold_multiply (tree, tree, tree);
-extern tree chrec_convert (tree, tree, gimple);
+extern tree chrec_convert (tree, tree, gimple, bool = true);
extern tree chrec_convert_rhs (tree, tree, gimple);
-extern tree chrec_convert_aggressive (tree, tree);
+extern tree chrec_convert_aggressive (tree, tree, bool *);
/* Operations. */
extern tree chrec_apply (unsigned, tree, tree);
diff --git a/gcc-4.9/gcc/tree-scalar-evolution.c b/gcc-4.9/gcc/tree-scalar-evolution.c
index f1ddc24b1..218baa406 100644
--- a/gcc-4.9/gcc/tree-scalar-evolution.c
+++ b/gcc-4.9/gcc/tree-scalar-evolution.c
@@ -2116,7 +2116,7 @@ analyze_scalar_evolution_in_loop (struct loop *wrto_loop, struct loop *use_loop,
/* We cannot just do
tmp = analyze_scalar_evolution (use_loop, version);
- ev = resolve_mixers (wrto_loop, tmp);
+ ev = resolve_mixers (wrto_loop, tmp, folded_casts);
as resolve_mixers would query the scalar evolution with respect to
wrto_loop. For example, in the situation described in the function
@@ -2125,9 +2125,9 @@ analyze_scalar_evolution_in_loop (struct loop *wrto_loop, struct loop *use_loop,
analyze_scalar_evolution (use_loop, version) = k2
- and resolve_mixers (loop1, k2) finds that the value of k2 in loop 1
- is 100, which is a wrong result, since we are interested in the
- value in loop 3.
+ and resolve_mixers (loop1, k2, folded_casts) finds that the value of
+ k2 in loop 1 is 100, which is a wrong result, since we are interested
+ in the value in loop 3.
Instead, we need to proceed from use_loop to wrto_loop loop by loop,
each time checking that there is no evolution in the inner loop. */
@@ -2137,10 +2137,7 @@ analyze_scalar_evolution_in_loop (struct loop *wrto_loop, struct loop *use_loop,
while (1)
{
tmp = analyze_scalar_evolution (use_loop, ev);
- ev = resolve_mixers (use_loop, tmp);
-
- if (folded_casts && tmp != ev)
- *folded_casts = true;
+ ev = resolve_mixers (use_loop, tmp, folded_casts);
if (use_loop == wrto_loop)
return ev;
@@ -2262,7 +2259,7 @@ loop_closed_phi_def (tree var)
}
static tree instantiate_scev_r (basic_block, struct loop *, struct loop *,
- tree, bool, int);
+ tree, bool *, int);
/* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
and EVOLUTION_LOOP, that were left under a symbolic form.
@@ -2271,9 +2268,10 @@ static tree instantiate_scev_r (basic_block, struct loop *, struct loop *,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2282,7 +2280,7 @@ static tree
instantiate_scev_name (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
tree chrec,
- bool fold_conversions,
+ bool *fold_conversions,
int size_expr)
{
tree res;
@@ -2376,9 +2374,10 @@ instantiate_scev_name (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2386,7 +2385,7 @@ instantiate_scev_name (basic_block instantiate_below,
static tree
instantiate_scev_poly (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *,
- tree chrec, bool fold_conversions, int size_expr)
+ tree chrec, bool *fold_conversions, int size_expr)
{
tree op1;
tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2420,9 +2419,10 @@ instantiate_scev_poly (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2432,7 +2432,7 @@ instantiate_scev_binary (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
tree chrec, enum tree_code code,
tree type, tree c0, tree c1,
- bool fold_conversions, int size_expr)
+ bool *fold_conversions, int size_expr)
{
tree op1;
tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
@@ -2478,9 +2478,10 @@ instantiate_scev_binary (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2488,7 +2489,7 @@ instantiate_scev_binary (basic_block instantiate_below,
static tree
instantiate_array_ref (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
- tree chrec, bool fold_conversions, int size_expr)
+ tree chrec, bool *fold_conversions, int size_expr)
{
tree res;
tree index = TREE_OPERAND (chrec, 1);
@@ -2515,9 +2516,10 @@ instantiate_array_ref (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2526,7 +2528,7 @@ static tree
instantiate_scev_convert (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
tree chrec, tree type, tree op,
- bool fold_conversions, int size_expr)
+ bool *fold_conversions, int size_expr)
{
tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
inner_loop, op,
@@ -2537,19 +2539,21 @@ instantiate_scev_convert (basic_block instantiate_below,
if (fold_conversions)
{
- tree tmp = chrec_convert_aggressive (type, op0);
+ tree tmp = chrec_convert_aggressive (type, op0, fold_conversions);
if (tmp)
return tmp;
- }
- if (chrec && op0 == op)
- return chrec;
+ /* If we used chrec_convert_aggressive, we can no longer assume that
+ signed chrecs do not overflow, as chrec_convert does, so avoid
+ calling it in that case. */
+ if (*fold_conversions)
+ {
+ if (chrec && op0 == op)
+ return chrec;
- /* If we used chrec_convert_aggressive, we can no longer assume that
- signed chrecs do not overflow, as chrec_convert does, so avoid
- calling it in that case. */
- if (fold_conversions)
- return fold_convert (type, op0);
+ return fold_convert (type, op0);
+ }
+ }
return chrec_convert (type, op0, NULL);
}
@@ -2563,9 +2567,10 @@ instantiate_scev_convert (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2575,7 +2580,7 @@ instantiate_scev_not (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
tree chrec,
enum tree_code code, tree type, tree op,
- bool fold_conversions, int size_expr)
+ bool *fold_conversions, int size_expr)
{
tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
inner_loop, op,
@@ -2613,9 +2618,10 @@ instantiate_scev_not (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2624,7 +2630,7 @@ static tree
instantiate_scev_3 (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
tree chrec,
- bool fold_conversions, int size_expr)
+ bool *fold_conversions, int size_expr)
{
tree op1, op2;
tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2661,9 +2667,10 @@ instantiate_scev_3 (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2672,7 +2679,7 @@ static tree
instantiate_scev_2 (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
tree chrec,
- bool fold_conversions, int size_expr)
+ bool *fold_conversions, int size_expr)
{
tree op1;
tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2701,9 +2708,10 @@ instantiate_scev_2 (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2712,7 +2720,7 @@ static tree
instantiate_scev_1 (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
tree chrec,
- bool fold_conversions, int size_expr)
+ bool *fold_conversions, int size_expr)
{
tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
inner_loop, TREE_OPERAND (chrec, 0),
@@ -2734,9 +2742,10 @@ instantiate_scev_1 (basic_block instantiate_below,
CACHE is the cache of already instantiated values.
- FOLD_CONVERSIONS should be set to true when the conversions that
- may wrap in signed/pointer type are folded, as long as the value of
- the chrec is preserved.
+ Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+ conversions that may wrap in signed/pointer type are folded, as long
+ as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
+ then we don't do such fold.
SIZE_EXPR is used for computing the size of the expression to be
instantiated, and to stop if it exceeds some limit. */
@@ -2745,7 +2754,7 @@ static tree
instantiate_scev_r (basic_block instantiate_below,
struct loop *evolution_loop, struct loop *inner_loop,
tree chrec,
- bool fold_conversions, int size_expr)
+ bool *fold_conversions, int size_expr)
{
/* Give up if the expression is larger than the MAX that we allow. */
if (size_expr++ > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_SIZE))
@@ -2870,7 +2879,7 @@ instantiate_scev (basic_block instantiate_below, struct loop *evolution_loop,
}
res = instantiate_scev_r (instantiate_below, evolution_loop,
- NULL, chrec, false, 0);
+ NULL, chrec, NULL, 0);
if (destr)
{
@@ -2894,9 +2903,10 @@ instantiate_scev (basic_block instantiate_below, struct loop *evolution_loop,
of an expression. */
tree
-resolve_mixers (struct loop *loop, tree chrec)
+resolve_mixers (struct loop *loop, tree chrec, bool *folded_casts)
{
bool destr = false;
+ bool fold_conversions = false;
if (!global_cache)
{
global_cache = new instantiate_cache_type;
@@ -2904,7 +2914,10 @@ resolve_mixers (struct loop *loop, tree chrec)
}
tree ret = instantiate_scev_r (block_before_loop (loop), loop, NULL,
- chrec, true, 0);
+ chrec, &fold_conversions, 0);
+
+ if (folded_casts && !*folded_casts)
+ *folded_casts = fold_conversions;
if (destr)
{
@@ -3369,7 +3382,8 @@ scev_const_prop (void)
&& !INTEGRAL_TYPE_P (type))
continue;
- ev = resolve_mixers (loop, analyze_scalar_evolution (loop, name));
+ ev = resolve_mixers (loop, analyze_scalar_evolution (loop, name),
+ NULL);
if (!is_gimple_min_invariant (ev)
|| !may_propagate_copy (name, ev))
continue;
diff --git a/gcc-4.9/gcc/tree-scalar-evolution.h b/gcc-4.9/gcc/tree-scalar-evolution.h
index 556997684..cb9af5122 100644
--- a/gcc-4.9/gcc/tree-scalar-evolution.h
+++ b/gcc-4.9/gcc/tree-scalar-evolution.h
@@ -31,7 +31,7 @@ extern void scev_reset_htab (void);
extern void scev_finalize (void);
extern tree analyze_scalar_evolution (struct loop *, tree);
extern tree instantiate_scev (basic_block, struct loop *, tree);
-extern tree resolve_mixers (struct loop *, tree);
+extern tree resolve_mixers (struct loop *, tree, bool *);
extern void gather_stats_on_scev_database (void);
extern unsigned int scev_const_prop (void);
extern bool expression_expensive_p (tree);
diff --git a/gcc-4.9/gcc/tree-ssa-loop-ivopts.c b/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
index c5a5dd48a..0bf8e2aff 100644
--- a/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc-4.9/gcc/tree-ssa-loop-ivopts.c
@@ -142,9 +142,10 @@ struct iv
tree base_object; /* A memory object to that the induction variable points. */
tree step; /* Step of the iv (constant only). */
tree ssa_name; /* The ssa name with the value. */
+ unsigned use_id; /* The identifier in the use if it is the case. */
bool biv_p; /* Is it a biv? */
bool have_use_for; /* Do we already have a use for it? */
- unsigned use_id; /* The identifier in the use if it is the case. */
+ bool no_overflow; /* True if the iv doesn't overflow. */
};
/* Per-ssa version information (induction variable descriptions, etc.). */
@@ -197,6 +198,7 @@ struct cost_pair
struct iv_use
{
unsigned id; /* The id of the use. */
+ unsigned sub_id; /* The id of the sub use. */
enum use_type type; /* Type of the use. */
struct iv *iv; /* The induction variable it is based on. */
gimple stmt; /* Statement in that it occurs. */
@@ -210,6 +212,11 @@ struct iv_use
struct iv_cand *selected;
/* The selected candidate. */
+
+ struct iv_use *next; /* The next sub use. */
+ tree addr_base; /* Base address with const offset stripped. */
+ unsigned HOST_WIDE_INT addr_offset;
+ /* Const offset stripped from base address. */
};
/* The position where the iv is computed. */
@@ -522,7 +529,11 @@ dump_iv (FILE *file, struct iv *iv)
void
dump_use (FILE *file, struct iv_use *use)
{
- fprintf (file, "use %d\n", use->id);
+ fprintf (file, "use %d", use->id);
+ if (use->sub_id)
+ fprintf (file, ".%d", use->sub_id);
+
+ fprintf (file, "\n");
switch (use->type)
{
@@ -571,8 +582,12 @@ dump_uses (FILE *file, struct ivopts_data *data)
for (i = 0; i < n_iv_uses (data); i++)
{
use = iv_use (data, i);
-
- dump_use (file, use);
+ do
+ {
+ dump_use (file, use);
+ use = use->next;
+ }
+ while (use);
fprintf (file, "\n");
}
}
@@ -929,10 +944,10 @@ determine_base_object (tree expr)
}
/* Allocates an induction variable with given initial value BASE and step STEP
- for loop LOOP. */
+ for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
static struct iv *
-alloc_iv (tree base, tree step)
+alloc_iv (tree base, tree step, bool no_overflow = false)
{
tree base_object = base;
struct iv *iv = XCNEW (struct iv);
@@ -963,21 +978,24 @@ alloc_iv (tree base, tree step)
iv->have_use_for = false;
iv->use_id = 0;
iv->ssa_name = NULL_TREE;
+ iv->no_overflow = no_overflow;
return iv;
}
-/* Sets STEP and BASE for induction variable IV. */
+/* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
+ doesn't overflow. */
static void
-set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
+set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
+ bool no_overflow)
{
struct version_info *info = name_info (data, iv);
gcc_assert (!info->iv);
bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
- info->iv = alloc_iv (base, step);
+ info->iv = alloc_iv (base, step, no_overflow);
info->iv->ssa_name = iv;
}
@@ -999,37 +1017,19 @@ get_iv (struct ivopts_data *data, tree var)
if (!bb
|| !flow_bb_inside_loop_p (data->current_loop, bb))
- set_iv (data, var, var, build_int_cst (type, 0));
+ set_iv (data, var, var, build_int_cst (type, 0), true);
}
return name_info (data, var)->iv;
}
-/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
- not define a simple affine biv with nonzero step. */
-
-static tree
-determine_biv_step (gimple phi)
-{
- struct loop *loop = gimple_bb (phi)->loop_father;
- tree name = PHI_RESULT (phi);
- affine_iv iv;
-
- if (virtual_operand_p (name))
- return NULL_TREE;
-
- if (!simple_iv (loop, loop, name, &iv, true))
- return NULL_TREE;
-
- return integer_zerop (iv.step) ? NULL_TREE : iv.step;
-}
-
/* Finds basic ivs. */
static bool
find_bivs (struct ivopts_data *data)
{
gimple phi;
+ affine_iv iv;
tree step, type, base;
bool found = false;
struct loop *loop = data->current_loop;
@@ -1042,10 +1042,16 @@ find_bivs (struct ivopts_data *data)
if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
continue;
- step = determine_biv_step (phi);
- if (!step)
+ if (virtual_operand_p (PHI_RESULT (phi)))
continue;
+ if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
+ continue;
+
+ if (integer_zerop (iv.step))
+ continue;
+
+ step = iv.step;
base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
base = expand_simple_operations (base);
if (contains_abnormal_ssa_name_p (base)
@@ -1062,7 +1068,7 @@ find_bivs (struct ivopts_data *data)
step = fold_convert (type, step);
}
- set_iv (data, PHI_RESULT (phi), base, step);
+ set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
found = true;
}
@@ -1158,7 +1164,7 @@ find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
if (!find_givs_in_stmt_scev (data, stmt, &iv))
return;
- set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
+ set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
}
/* Finds general ivs in basic block BB. */
@@ -1229,33 +1235,88 @@ find_induction_variables (struct ivopts_data *data)
return true;
}
-/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
+/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
+ For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
+ is the const offset stripped from IV base. For uses of other types,
+ ADDR_BASE and ADDR_OFFSET are zero by default. */
static struct iv_use *
record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
- gimple stmt, enum use_type use_type)
+ gimple stmt, enum use_type use_type, tree addr_base = NULL,
+ unsigned HOST_WIDE_INT addr_offset = 0)
{
struct iv_use *use = XCNEW (struct iv_use);
use->id = n_iv_uses (data);
+ use->sub_id = 0;
use->type = use_type;
use->iv = iv;
use->stmt = stmt;
use->op_p = use_p;
use->related_cands = BITMAP_ALLOC (NULL);
+ use->next = NULL;
+ use->addr_base = addr_base;
+ use->addr_offset = addr_offset;
/* To avoid showing ssa name in the dumps, if it was not reset by the
caller. */
iv->ssa_name = NULL_TREE;
- if (dump_file && (dump_flags & TDF_DETAILS))
- dump_use (dump_file, use);
-
data->iv_uses.safe_push (use);
return use;
}
+/* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
+ The sub use is recorded under the one whose use id is ID_GROUP. */
+
+static struct iv_use *
+record_sub_use (struct ivopts_data *data, tree *use_p,
+ struct iv *iv, gimple stmt, enum use_type use_type,
+ tree addr_base, unsigned HOST_WIDE_INT addr_offset,
+ unsigned int id_group)
+{
+ struct iv_use *use = XCNEW (struct iv_use);
+ struct iv_use *group = iv_use (data, id_group);
+
+ use->id = group->id;
+ use->sub_id = 0;
+ use->type = use_type;
+ use->iv = iv;
+ use->stmt = stmt;
+ use->op_p = use_p;
+ use->related_cands = NULL;
+ use->addr_base = addr_base;
+ use->addr_offset = addr_offset;
+
+ /* Sub use list is maintained in offset ascending order. */
+ if (addr_offset <= group->addr_offset)
+ {
+ use->related_cands = group->related_cands;
+ group->related_cands = NULL;
+ use->next = group;
+ data->iv_uses[id_group] = use;
+ }
+ else
+ {
+ struct iv_use *pre;
+ do
+ {
+ pre = group;
+ group = group->next;
+ }
+ while (group && addr_offset > group->addr_offset);
+ use->next = pre->next;
+ pre->next = use;
+ }
+
+ /* To avoid showing ssa name in the dumps, if it was not reset by the
+ caller. */
+ iv->ssa_name = NULL_TREE;
+
+ return use;
+}
+
/* Checks whether OP is a loop-level invariant and if so, records it.
NONLINEAR_USE is true if the invariant is used in a way we do not
handle specially. */
@@ -1515,6 +1576,7 @@ idx_find_step (tree base, tree *idx, void *data)
{
struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
struct iv *iv;
+ bool use_overflow_semantics = false;
tree step, iv_base, iv_step, lbound, off;
struct loop *loop = dta->ivopts_data->current_loop;
@@ -1574,9 +1636,12 @@ idx_find_step (tree base, tree *idx, void *data)
iv_base = iv->base;
iv_step = iv->step;
+ if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
+ use_overflow_semantics = true;
+
if (!convert_affine_scev (dta->ivopts_data->current_loop,
sizetype, &iv_base, &iv_step, dta->stmt,
- false))
+ use_overflow_semantics))
{
/* The index might wrap. */
return false;
@@ -1739,6 +1804,50 @@ may_be_nonaddressable_p (tree expr)
return false;
}
+static tree
+strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
+
+/* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
+ If there is an existing use which has same stripped iv base and step,
+ this function records this one as a sub use to that; otherwise records
+ it as a normal one. */
+
+static struct iv_use *
+record_group_use (struct ivopts_data *data, tree *use_p,
+ struct iv *iv, gimple stmt, enum use_type use_type)
+{
+ unsigned int i;
+ struct iv_use *use;
+ tree addr_base;
+ unsigned HOST_WIDE_INT addr_offset;
+
+ /* Only support sub use for address type uses, that is, with base
+ object. */
+ if (!iv->base_object)
+ return record_use (data, use_p, iv, stmt, use_type);
+
+ addr_base = strip_offset (iv->base, &addr_offset);
+ for (i = 0; i < n_iv_uses (data); i++)
+ {
+ use = iv_use (data, i);
+ if (use->type != USE_ADDRESS || !use->iv->base_object)
+ continue;
+
+ /* Check if it has the same stripped base and step. */
+ if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
+ && operand_equal_p (iv->step, use->iv->step, 0)
+ && operand_equal_p (addr_base, use->addr_base, 0))
+ break;
+ }
+
+ if (i == n_iv_uses (data))
+ return record_use (data, use_p, iv, stmt,
+ use_type, addr_base, addr_offset);
+ else
+ return record_sub_use (data, use_p, iv, stmt,
+ use_type, addr_base, addr_offset, i);
+}
+
/* Finds addresses in *OP_P inside STMT. */
static void
@@ -1849,7 +1958,7 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p
}
civ = alloc_iv (base, step);
- record_use (data, op_p, civ, stmt, USE_ADDRESS);
+ record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
return;
fail:
@@ -2035,6 +2144,172 @@ find_interesting_uses (struct ivopts_data *data)
free (body);
}
+/* Compute maximum offset of [base + offset] addressing mode
+ for memory reference represented by USE. */
+
+static HOST_WIDE_INT
+compute_max_addr_offset (struct iv_use *use)
+{
+ int width;
+ rtx reg, addr;
+ HOST_WIDE_INT i, off;
+ unsigned list_index, num;
+ addr_space_t as;
+ machine_mode mem_mode, addr_mode;
+ static vec<HOST_WIDE_INT> max_offset_list;
+
+ as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
+ mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
+
+ num = max_offset_list.length ();
+ list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
+ if (list_index >= num)
+ {
+ max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
+ for (; num < max_offset_list.length (); num++)
+ max_offset_list[num] = -1;
+ }
+
+ off = max_offset_list[list_index];
+ if (off != -1)
+ return off;
+
+ addr_mode = targetm.addr_space.address_mode (as);
+ reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
+ addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
+
+ width = GET_MODE_BITSIZE (addr_mode) - 1;
+ if (width > (HOST_BITS_PER_WIDE_INT - 1))
+ width = HOST_BITS_PER_WIDE_INT - 1;
+
+ for (i = width; i > 0; i--)
+ {
+ off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
+ XEXP (addr, 1) = gen_int_mode (off, addr_mode);
+ if (memory_address_addr_space_p (mem_mode, addr, as))
+ break;
+
+ /* For some strict-alignment targets, the offset must be naturally
+ aligned. Try an aligned offset if mem_mode is not QImode. */
+ off = ((unsigned HOST_WIDE_INT) 1 << i);
+ if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
+ {
+ off -= GET_MODE_SIZE (mem_mode);
+ XEXP (addr, 1) = gen_int_mode (off, addr_mode);
+ if (memory_address_addr_space_p (mem_mode, addr, as))
+ break;
+ }
+ }
+ if (i == 0)
+ off = 0;
+
+ max_offset_list[list_index] = off;
+ return off;
+}
+
+/* Check if all small groups should be split. Return true if and
+ only if:
+
+ 1) At least one groups contain two uses with different offsets.
+ 2) No group contains more than two uses with different offsets.
+
+ Return false otherwise. We want to split such groups because:
+
+ 1) Small groups don't have much benefit and may interfer with
+ general candidate selection.
+ 2) Size for problem with only small groups is usually small and
+ general algorithm can handle it well.
+
+ TODO -- Above claim may not hold when auto increment is supported. */
+
+static bool
+split_all_small_groups (struct ivopts_data *data)
+{
+ bool split_p = false;
+ unsigned int i, n, distinct;
+ struct iv_use *pre, *use;
+
+ n = n_iv_uses (data);
+ for (i = 0; i < n; i++)
+ {
+ use = iv_use (data, i);
+ if (!use->next)
+ continue;
+
+ distinct = 1;
+ gcc_assert (use->type == USE_ADDRESS);
+ for (pre = use, use = use->next; use; pre = use, use = use->next)
+ {
+ if (pre->addr_offset != use->addr_offset)
+ distinct++;
+
+ if (distinct > 2)
+ return false;
+ }
+ if (distinct == 2)
+ split_p = true;
+ }
+
+ return split_p;
+}
+
+/* For each group of address type uses, this function further groups
+ these uses according to the maximum offset supported by target's
+ [base + offset] addressing mode. */
+
+static void
+group_address_uses (struct ivopts_data *data)
+{
+ HOST_WIDE_INT max_offset = -1;
+ unsigned int i, n, sub_id;
+ struct iv_use *pre, *use;
+ unsigned HOST_WIDE_INT addr_offset_first;
+
+ /* Reset max offset to split all small groups. */
+ if (split_all_small_groups (data))
+ max_offset = 0;
+
+ n = n_iv_uses (data);
+ for (i = 0; i < n; i++)
+ {
+ use = iv_use (data, i);
+ if (!use->next)
+ continue;
+
+ gcc_assert (use->type == USE_ADDRESS);
+ if (max_offset != 0)
+ max_offset = compute_max_addr_offset (use);
+
+ while (use)
+ {
+ sub_id = 0;
+ addr_offset_first = use->addr_offset;
+ /* Only uses with offset that can fit in offset part against
+ the first use can be grouped together. */
+ for (pre = use, use = use->next;
+ use && (use->addr_offset - addr_offset_first
+ <= (unsigned HOST_WIDE_INT) max_offset);
+ pre = use, use = use->next)
+ {
+ use->id = pre->id;
+ use->sub_id = ++sub_id;
+ }
+
+ /* Break the list and create new group. */
+ if (use)
+ {
+ pre->next = NULL;
+ use->id = n_iv_uses (data);
+ use->related_cands = BITMAP_ALLOC (NULL);
+ data->iv_uses.safe_push (use);
+ }
+ }
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ dump_uses (dump_file, data);
+}
+
/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
is true, assume we are inside an address. If TOP_COMPREF is true, assume
we are at the top-level of the processed address. */
@@ -2458,6 +2733,8 @@ static void
add_candidate (struct ivopts_data *data,
tree base, tree step, bool important, struct iv_use *use)
{
+ gcc_assert (use == NULL || use->sub_id == 0);
+
if (ip_normal_pos (data->current_loop))
add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
if (ip_end_pos (data->current_loop)
@@ -2687,11 +2964,22 @@ new_cost (unsigned runtime, unsigned complexity)
return cost;
}
+/* Returns true if COST is infinite. */
+
+static bool
+infinite_cost_p (comp_cost cost)
+{
+ return cost.cost == INFTY;
+}
+
/* Adds costs COST1 and COST2. */
static comp_cost
add_costs (comp_cost cost1, comp_cost cost2)
{
+ if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
+ return infinite_cost;
+
cost1.cost += cost2.cost;
cost1.complexity += cost2.complexity;
@@ -2720,14 +3008,6 @@ compare_costs (comp_cost cost1, comp_cost cost2)
return cost1.cost - cost2.cost;
}
-/* Returns true if COST is infinite. */
-
-static bool
-infinite_cost_p (comp_cost cost)
-{
- return cost.cost == INFTY;
-}
-
/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
on invariants DEPENDS_ON and that the value used in expressing it
is VALUE, and in case of iv elimination the comparison operator is COMP. */
@@ -4204,7 +4484,15 @@ get_computation_cost_at (struct ivopts_data *data,
cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
}
- if (inv_expr_id)
+ /* Set of invariants depended on by sub use has already been computed
+ for the first use in the group. */
+ if (use->sub_id)
+ {
+ cost.cost = 0;
+ if (depends_on && *depends_on)
+ bitmap_clear (*depends_on);
+ }
+ else if (inv_expr_id)
{
*inv_expr_id =
get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
@@ -4333,6 +4621,8 @@ determine_use_iv_cost_address (struct ivopts_data *data,
bitmap depends_on;
bool can_autoinc;
int inv_expr_id = -1;
+ struct iv_use *sub_use;
+ comp_cost sub_cost;
comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
&can_autoinc, &inv_expr_id);
@@ -4346,6 +4636,15 @@ determine_use_iv_cost_address (struct ivopts_data *data,
else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
cost = infinite_cost;
}
+ for (sub_use = use->next;
+ sub_use && !infinite_cost_p (cost);
+ sub_use = sub_use->next)
+ {
+ sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
+ &can_autoinc, &inv_expr_id);
+ cost = add_costs (cost, sub_cost);
+ }
+
set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
inv_expr_id);
@@ -6533,8 +6832,8 @@ adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
/* Rewrites USE (address that is an iv) using candidate CAND. */
static void
-rewrite_use_address (struct ivopts_data *data,
- struct iv_use *use, struct iv_cand *cand)
+rewrite_use_address_1 (struct ivopts_data *data,
+ struct iv_use *use, struct iv_cand *cand)
{
aff_tree aff;
gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
@@ -6569,6 +6868,28 @@ rewrite_use_address (struct ivopts_data *data,
*use->op_p = ref;
}
+/* Rewrites USE (address that is an iv) using candidate CAND. If it's the
+ first use of a group, rewrites sub uses in the group too. */
+
+static void
+rewrite_use_address (struct ivopts_data *data,
+ struct iv_use *use, struct iv_cand *cand)
+{
+ struct iv_use *next;
+
+ gcc_assert (use->sub_id == 0);
+ rewrite_use_address_1 (data, use, cand);
+ update_stmt (use->stmt);
+
+ for (next = use->next; next != NULL; next = next->next)
+ {
+ rewrite_use_address_1 (data, next, cand);
+ update_stmt (next->stmt);
+ }
+
+ return;
+}
+
/* Rewrites USE (the condition such that one of the arguments is an iv) using
candidate CAND. */
@@ -6845,6 +7166,18 @@ free_loop_data (struct ivopts_data *data)
for (i = 0; i < n_iv_uses (data); i++)
{
struct iv_use *use = iv_use (data, i);
+ struct iv_use *pre = use, *sub = use->next;
+
+ while (sub)
+ {
+ gcc_assert (sub->related_cands == NULL);
+ gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
+
+ free (sub->iv);
+ pre = sub;
+ sub = sub->next;
+ free (pre);
+ }
free (use->iv);
BITMAP_FREE (use->related_cands);
@@ -6964,6 +7297,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
/* Finds interesting uses (item 1). */
find_interesting_uses (data);
+ group_address_uses (data);
if (n_iv_uses (data) > MAX_CONSIDERED_USES)
goto finish;
diff --git a/gcc-4.9/gcc/tree-ssa-loop-niter.c b/gcc-4.9/gcc/tree-ssa-loop-niter.c
index 8fb72b6a5..cb9214fbb 100644
--- a/gcc-4.9/gcc/tree-ssa-loop-niter.c
+++ b/gcc-4.9/gcc/tree-ssa-loop-niter.c
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
+#include "stor-layout.h"
#include "calls.h"
#include "expr.h"
#include "tm_p.h"
@@ -55,7 +56,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "stringpool.h"
#include "tree-ssanames.h"
-
+#include "ggc.h"
#define SWAP(X, Y) do { affine_iv *tmp = (X); (X) = (Y); (Y) = tmp; } while (0)
@@ -1161,6 +1162,7 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
iv1->base, iv0->base);
niter->niter = delta;
niter->max = mpz_get_double_int (niter_type, bnds->up, false);
+ niter->control.no_overflow = true;
return true;
}
@@ -1938,6 +1940,9 @@ number_of_iterations_exit (struct loop *loop, edge exit,
return false;
niter->assumptions = boolean_false_node;
+ niter->control.base = NULL_TREE;
+ niter->control.step = NULL_TREE;
+ niter->control.no_overflow = false;
stmt = last_stmt (exit->src);
if (!stmt || gimple_code (stmt) != GIMPLE_COND)
return false;
@@ -2714,6 +2719,29 @@ record_estimate (struct loop *loop, tree bound, double_int i_bound,
record_niter_bound (loop, i_bound, realistic, upper);
}
+/* Records the control iv analyzed in NITER for LOOP if the iv is valid
+ and doesn't overflow. */
+
+static void
+record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
+{
+ struct control_iv *iv;
+
+ if (!niter->control.base || !niter->control.step)
+ return;
+
+ if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
+ return;
+
+ iv = ggc_alloc_control_iv ();
+ iv->base = niter->control.base;
+ iv->step = niter->control.step;
+ iv->next = loop->control_ivs;
+ loop->control_ivs = iv;
+
+ return;
+}
+
/* Record the estimate on number of iterations of LOOP based on the fact that
the induction variable BASE + STEP * i evaluated in STMT does not wrap and
its values belong to the range <LOW, HIGH>. REALISTIC is true if the
@@ -3452,6 +3480,7 @@ estimate_numbers_of_iterations_loop (struct loop *loop)
record_estimate (loop, niter, niter_desc.max,
last_stmt (ex->src),
true, ex == likely_exit, true);
+ record_control_iv (loop, &niter_desc);
}
exits.release ();
@@ -3759,6 +3788,189 @@ nowrap_type_p (tree type)
return false;
}
+/* Return true if we can prove LOOP is exited before evolution of induction
+ variabled {BASE, STEP} overflows with respect to its type bound. */
+
+static bool
+loop_exits_before_overflow (tree base, tree step,
+ gimple at_stmt, struct loop *loop)
+{
+ double_int niter;
+ struct control_iv *civ;
+ struct nb_iter_bound *bound;
+ tree e, delta, step_abs, unsigned_base;
+ tree type = TREE_TYPE (step);
+ tree unsigned_type, valid_niter;
+
+ /* Don't issue signed overflow warnings. */
+ fold_defer_overflow_warnings ();
+
+ /* Compute the number of iterations before we reach the bound of the
+ type, and verify that the loop is exited before this occurs. */
+ unsigned_type = unsigned_type_for (type);
+ unsigned_base = fold_convert (unsigned_type, base);
+
+ if (tree_int_cst_sign_bit (step))
+ {
+ tree extreme = fold_convert (unsigned_type,
+ lower_bound_in_type (type, type));
+ delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
+ step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
+ fold_convert (unsigned_type, step));
+ }
+ else
+ {
+ tree extreme = fold_convert (unsigned_type,
+ upper_bound_in_type (type, type));
+ delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
+ step_abs = fold_convert (unsigned_type, step);
+ }
+
+ valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
+
+ estimate_numbers_of_iterations_loop (loop);
+
+ if (max_loop_iterations (loop, &niter)
+ && double_int_fits_to_tree_p (TREE_TYPE (valid_niter), niter)
+ && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
+ double_int_to_tree (TREE_TYPE (valid_niter),
+ niter))) != NULL
+ && integer_nonzerop (e))
+ {
+ fold_undefer_and_ignore_overflow_warnings ();
+ return true;
+ }
+ if (at_stmt)
+ for (bound = loop->bounds; bound; bound = bound->next)
+ {
+ if (n_of_executions_at_most (at_stmt, bound, valid_niter))
+ {
+ fold_undefer_and_ignore_overflow_warnings ();
+ return true;
+ }
+ }
+ fold_undefer_and_ignore_overflow_warnings ();
+
+ /* Try to prove loop is exited before {base, step} overflows with the
+ help of analyzed loop control IV. This is done only for IVs with
+ constant step because otherwise we don't have the information. */
+ if (TREE_CODE (step) == INTEGER_CST)
+ for (civ = loop->control_ivs; civ; civ = civ->next)
+ {
+ enum tree_code code;
+ tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
+
+ /* Have to consider type difference because operand_equal_p ignores
+ that for constants. */
+ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
+ || element_precision (type) != element_precision (civ_type))
+ continue;
+
+ /* Only consider control IV with same step. */
+ if (!operand_equal_p (step, civ->step, 0))
+ continue;
+
+ /* Done proving if this is a no-overflow control IV. */
+ if (operand_equal_p (base, civ->base, 0))
+ return true;
+
+ /* If this is a before stepping control IV, in other words, we have
+
+ {civ_base, step} = {base + step, step}
+
+ Because civ {base + step, step} doesn't overflow during loop
+ iterations, {base, step} will not overflow if we can prove the
+ operation "base + step" does not overflow. Specifically, we try
+ to prove below conditions are satisfied:
+
+ base <= UPPER_BOUND (type) - step ;;step > 0
+ base >= LOWER_BOUND (type) - step ;;step < 0
+
+ by proving the reverse conditions are false using loop's initial
+ condition. */
+ stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
+ if (operand_equal_p (stepped, civ->base, 0))
+ {
+ if (tree_int_cst_sign_bit (step))
+ {
+ code = LT_EXPR;
+ extreme = lower_bound_in_type (type, type);
+ }
+ else
+ {
+ code = GT_EXPR;
+ extreme = upper_bound_in_type (type, type);
+ }
+ extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+ e = fold_build2 (code, boolean_type_node, base, extreme);
+ e = simplify_using_initial_conditions (loop, e);
+ if (integer_zerop (e))
+ return true;
+
+ continue;
+ }
+
+ /* Similar to above, only in this case we have:
+
+ {civ_base, step} = {(signed T)((unsigned T)base + step), step}
+ && TREE_TYPE (civ_base) = signed T.
+
+ We prove that below condition is satisfied:
+
+ (signed T)((unsigned T)base + step)
+ == (signed T)(unsigned T)base + step
+ == base + step
+
+ because of exact the same reason as above. This also proves
+ there is no overflow in the operation "base + step", thus the
+ induction variable {base, step} during loop iterations.
+
+ This is necessary to handle cases as below:
+
+ int foo (int *a, signed char s, signed char l)
+ {
+ signed char i;
+ for (i = s; i < l; i++)
+ a[i] = 0;
+ return 0;
+ }
+
+ The variable I is firstly converted to type unsigned char,
+ incremented, then converted back to type signed char. */
+ if (!CONVERT_EXPR_P (civ->base) || TREE_TYPE (civ->base) != type)
+ continue;
+ e = TREE_OPERAND (civ->base, 0);
+ if (TREE_CODE (e) != PLUS_EXPR
+ || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
+ || !operand_equal_p (step,
+ fold_convert (type,
+ TREE_OPERAND (e, 1)), 0))
+ continue;
+ e = TREE_OPERAND (e, 0);
+ if (!CONVERT_EXPR_P (e) || !operand_equal_p (e, unsigned_base, 0))
+ continue;
+ e = TREE_OPERAND (e, 0);
+ gcc_assert (operand_equal_p (e, base, 0));
+ if (tree_int_cst_sign_bit (step))
+ {
+ code = LT_EXPR;
+ extreme = lower_bound_in_type (type, type);
+ }
+ else
+ {
+ code = GT_EXPR;
+ extreme = upper_bound_in_type (type, type);
+ }
+ extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+ e = fold_build2 (code, boolean_type_node, base, extreme);
+ e = simplify_using_initial_conditions (loop, e);
+ if (integer_zerop (e))
+ return true;
+ }
+
+ return false;
+}
+
/* Return false only when the induction variable BASE + STEP * I is
known to not overflow: i.e. when the number of iterations is small
enough with respect to the step and initial condition in order to
@@ -3774,13 +3986,6 @@ scev_probably_wraps_p (tree base, tree step,
gimple at_stmt, struct loop *loop,
bool use_overflow_semantics)
{
- tree delta, step_abs;
- tree unsigned_type, valid_niter;
- tree type = TREE_TYPE (step);
- tree e;
- double_int niter;
- struct nb_iter_bound *bound;
-
/* FIXME: We really need something like
http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
@@ -3814,56 +4019,8 @@ scev_probably_wraps_p (tree base, tree step,
if (TREE_CODE (step) != INTEGER_CST)
return true;
- /* Don't issue signed overflow warnings. */
- fold_defer_overflow_warnings ();
-
- /* Otherwise, compute the number of iterations before we reach the
- bound of the type, and verify that the loop is exited before this
- occurs. */
- unsigned_type = unsigned_type_for (type);
- base = fold_convert (unsigned_type, base);
-
- if (tree_int_cst_sign_bit (step))
- {
- tree extreme = fold_convert (unsigned_type,
- lower_bound_in_type (type, type));
- delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
- step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
- fold_convert (unsigned_type, step));
- }
- else
- {
- tree extreme = fold_convert (unsigned_type,
- upper_bound_in_type (type, type));
- delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
- step_abs = fold_convert (unsigned_type, step);
- }
-
- valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
-
- estimate_numbers_of_iterations_loop (loop);
-
- if (max_loop_iterations (loop, &niter)
- && double_int_fits_to_tree_p (TREE_TYPE (valid_niter), niter)
- && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
- double_int_to_tree (TREE_TYPE (valid_niter),
- niter))) != NULL
- && integer_nonzerop (e))
- {
- fold_undefer_and_ignore_overflow_warnings ();
- return false;
- }
- if (at_stmt)
- for (bound = loop->bounds; bound; bound = bound->next)
- {
- if (n_of_executions_at_most (at_stmt, bound, valid_niter))
- {
- fold_undefer_and_ignore_overflow_warnings ();
- return false;
- }
- }
-
- fold_undefer_and_ignore_overflow_warnings ();
+ if (loop_exits_before_overflow (base, step, at_stmt, loop))
+ return false;
/* At this point we still don't have a proof that the iv does not
overflow: give up. */
@@ -3875,17 +4032,26 @@ scev_probably_wraps_p (tree base, tree step,
void
free_numbers_of_iterations_estimates_loop (struct loop *loop)
{
- struct nb_iter_bound *bound, *next;
+ struct control_iv *civ;
+ struct nb_iter_bound *bound;
loop->nb_iterations = NULL;
loop->estimate_state = EST_NOT_COMPUTED;
- for (bound = loop->bounds; bound; bound = next)
+ for (bound = loop->bounds; bound;)
{
- next = bound->next;
+ struct nb_iter_bound *next = bound->next;
ggc_free (bound);
+ bound = next;
}
-
loop->bounds = NULL;
+
+ for (civ = loop->control_ivs; civ;)
+ {
+ struct control_iv *next = civ->next;
+ ggc_free (civ);
+ civ = next;
+ }
+ loop->control_ivs = NULL;
}
/* Frees the information on upper bounds on numbers of iterations of loops. */
diff --git a/gcc-4.9/gcc/tree-ssa-loop-niter.h b/gcc-4.9/gcc/tree-ssa-loop-niter.h
index df0d64d02..dd2535851 100644
--- a/gcc-4.9/gcc/tree-ssa-loop-niter.h
+++ b/gcc-4.9/gcc/tree-ssa-loop-niter.h
@@ -41,6 +41,7 @@ extern void estimate_numbers_of_iterations (void);
extern bool stmt_dominates_stmt_p (gimple, gimple);
extern bool nowrap_type_p (tree);
extern bool scev_probably_wraps_p (tree, tree, gimple, struct loop *, bool);
+extern void free_loop_control_ivs (struct loop *);
extern void free_numbers_of_iterations_estimates_loop (struct loop *);
extern void free_numbers_of_iterations_estimates (void);
extern void substitute_in_loop_info (struct loop *, tree, tree);