aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/tree-vect-patterns.c
diff options
context:
space:
mode:
authorRong Xu <xur@google.com>2014-07-21 16:47:22 -0700
committerRong Xu <xur@google.com>2014-07-29 15:31:03 -0700
commit38a8aecfb882072900434499696b5c32a2274515 (patch)
tree2aac97f0ae24b03cd98c1a06e989c031c173f889 /gcc-4.9/gcc/tree-vect-patterns.c
parentc231900e5dcc14d8296bd9f62b45997a49d4d5e7 (diff)
downloadtoolchain_gcc-38a8aecfb882072900434499696b5c32a2274515.tar.gz
toolchain_gcc-38a8aecfb882072900434499696b5c32a2274515.tar.bz2
toolchain_gcc-38a8aecfb882072900434499696b5c32a2274515.zip
[4.9] Switch gcc-4.9 to use google/gcc-4_9 branch.
This source drop uses svn version r212828 of google/gcc-4.9 branch. We also cherry-picked r213062, r213063 and r213064 to fix windows build issues. All gcc-4.9 patches before July 3rd are ported to google/gcc-4.9. The following prior commits has not been merged to google branch yet. (They are included in this commit). e7af147f979e657fe2df00808e5b4319b0e088c6, baf87df3cb2683649ba7e9872362a7e721117c23, and c231900e5dcc14d8296bd9f62b45997a49d4d5e7. Change-Id: I4bea3ea470387ff751c2be4cb0d4a12059b9299b
Diffstat (limited to 'gcc-4.9/gcc/tree-vect-patterns.c')
-rw-r--r--gcc-4.9/gcc/tree-vect-patterns.c244
1 files changed, 239 insertions, 5 deletions
diff --git a/gcc-4.9/gcc/tree-vect-patterns.c b/gcc-4.9/gcc/tree-vect-patterns.c
index 5db023fc4..2f2eeed7c 100644
--- a/gcc-4.9/gcc/tree-vect-patterns.c
+++ b/gcc-4.9/gcc/tree-vect-patterns.c
@@ -57,6 +57,8 @@ static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
tree *);
static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
tree *);
+static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
+ tree *);
static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
tree *);
@@ -74,6 +76,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_widen_mult_pattern,
vect_recog_widen_sum_pattern,
vect_recog_dot_prod_pattern,
+ vect_recog_sad_pattern,
vect_recog_pow_pattern,
vect_recog_widen_shift_pattern,
vect_recog_over_widening_pattern,
@@ -152,9 +155,8 @@ vect_single_imm_use (gimple def_stmt)
}
/* Check whether NAME, an ssa-name used in USE_STMT,
- is a result of a type promotion or demotion, such that:
+ is a result of a type promotion, such that:
DEF_STMT: NAME = NOP (name0)
- where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
If CHECK_SIGN is TRUE, check that either both types are signed or both are
unsigned. */
@@ -201,10 +203,8 @@ type_conversion_p (tree name, gimple use_stmt, bool check_sign,
if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
*promotion = true;
- else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
- *promotion = false;
else
- return false;
+ *promotion = false;
if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
bb_vinfo, &dummy_gimple, &dummy, &dt))
@@ -445,6 +445,240 @@ vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
}
+/* Function vect_recog_sad_pattern
+
+ Try to find the following Sum of Absolute Difference (SAD) pattern:
+
+ type x_t, y_t;
+ signed TYPE1 diff, abs_diff;
+ TYPE2 sum = init;
+ loop:
+ sum_0 = phi <init, sum_1>
+ S1 x_t = ...
+ S2 y_t = ...
+ S3 x_T = (TYPE1) x_t;
+ S4 y_T = (TYPE1) y_t;
+ S5 diff = x_T - y_T;
+ S6 abs_diff = ABS_EXPR <diff>;
+ [S7 abs_diff = (TYPE2) abs_diff; #optional]
+ S8 sum_1 = abs_diff + sum_0;
+
+ where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
+ same size of 'TYPE1' or bigger. This is a special case of a reduction
+ computation.
+
+ Input:
+
+ * STMTS: Contains a stmt from which the pattern search begins. In the
+ example, when this function is called with S8, the pattern
+ {S3,S4,S5,S6,S7,S8} will be detected.
+
+ Output:
+
+ * TYPE_IN: The type of the input arguments to the pattern.
+
+ * TYPE_OUT: The type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ SAD_EXPR <x_t, y_t, sum_0>
+ */
+
+static gimple
+vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
+ tree *type_out)
+{
+ gimple last_stmt = (*stmts)[0];
+ tree sad_oprnd0, sad_oprnd1;
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+ tree half_type;
+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ struct loop *loop;
+ bool promotion;
+
+ if (!loop_info)
+ return NULL;
+
+ loop = LOOP_VINFO_LOOP (loop_info);
+
+ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+ tree sum_type = gimple_expr_type (last_stmt);
+
+ /* Look for the following pattern
+ DX = (TYPE1) X;
+ DY = (TYPE1) Y;
+ DDIFF = DX - DY;
+ DAD = ABS_EXPR <DDIFF>;
+ DDPROD = (TYPE2) DPROD;
+ sum_1 = DAD + sum_0;
+ In which
+ - DX is at least double the size of X
+ - DY is at least double the size of Y
+ - DX, DY, DDIFF, DAD all have the same type
+ - sum is the same size of DAD or bigger
+ - sum has been recognized as a reduction variable.
+
+ This is equivalent to:
+ DDIFF = X w- Y; #widen sub
+ DAD = ABS_EXPR <DDIFF>;
+ sum_1 = DAD w+ sum_0; #widen summation
+ or
+ DDIFF = X w- Y; #widen sub
+ DAD = ABS_EXPR <DDIFF>;
+ sum_1 = DAD + sum_0; #summation
+ */
+
+ /* Starting from LAST_STMT, follow the defs of its uses in search
+ of the above pattern. */
+
+ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
+ return NULL;
+
+ tree plus_oprnd0, plus_oprnd1;
+
+ if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+ {
+ /* Has been detected as widening-summation? */
+
+ gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
+ sum_type = gimple_expr_type (stmt);
+ if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
+ return NULL;
+ plus_oprnd0 = gimple_assign_rhs1 (stmt);
+ plus_oprnd1 = gimple_assign_rhs2 (stmt);
+ half_type = TREE_TYPE (plus_oprnd0);
+ }
+ else
+ {
+ gimple def_stmt;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+ return NULL;
+ plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
+ plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
+ if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
+ || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
+ return NULL;
+
+ /* The type conversion could be promotion, demotion,
+ or just signed -> unsigned. */
+ if (type_conversion_p (plus_oprnd0, last_stmt, false,
+ &half_type, &def_stmt, &promotion))
+ plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
+ else
+ half_type = sum_type;
+ }
+
+ /* So far so good. Since last_stmt was detected as a (summation) reduction,
+ we know that plus_oprnd1 is the reduction variable (defined by a loop-header
+ phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
+ Then check that plus_oprnd0 is defined by an abs_expr. */
+
+ if (TREE_CODE (plus_oprnd0) != SSA_NAME)
+ return NULL;
+
+ tree abs_type = half_type;
+ gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
+
+ /* It could not be the sad pattern if the abs_stmt is outside the loop. */
+ if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
+ return NULL;
+
+ /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
+ inside the loop (in case we are analyzing an outer-loop). */
+ if (!is_gimple_assign (abs_stmt))
+ return NULL;
+
+ stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
+ gcc_assert (abs_stmt_vinfo);
+ if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
+ return NULL;
+ if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
+ return NULL;
+
+ tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+ if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
+ return NULL;
+ if (TYPE_UNSIGNED (abs_type))
+ return NULL;
+
+ /* We then detect if the operand of abs_expr is defined by a minus_expr. */
+
+ if (TREE_CODE (abs_oprnd) != SSA_NAME)
+ return NULL;
+
+ gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
+
+ /* It could not be the sad pattern if the diff_stmt is outside the loop. */
+ if (!gimple_bb (diff_stmt)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
+ return NULL;
+
+ /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
+ inside the loop (in case we are analyzing an outer-loop). */
+ if (!is_gimple_assign (diff_stmt))
+ return NULL;
+
+ stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
+ gcc_assert (diff_stmt_vinfo);
+ if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
+ return NULL;
+ if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
+ return NULL;
+
+ tree half_type0, half_type1;
+ gimple def_stmt;
+
+ tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
+ tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
+
+ if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
+ || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
+ return NULL;
+ if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
+ &half_type0, &def_stmt, &promotion)
+ || !promotion)
+ return NULL;
+ sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
+
+ if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
+ &half_type1, &def_stmt, &promotion)
+ || !promotion)
+ return NULL;
+ sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
+
+ if (!types_compatible_p (half_type0, half_type1))
+ return NULL;
+ if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
+ || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
+ return NULL;
+
+ *type_in = TREE_TYPE (sad_oprnd0);
+ *type_out = sum_type;
+
+ /* Pattern detected. Create a stmt to be used to replace the pattern: */
+ tree var = vect_recog_temp_ssa_var (sum_type, NULL);
+ gimple pattern_stmt = gimple_build_assign_with_ops
+ (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_recog_sad_pattern: detected: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ /* We don't allow changing the order of the computation in the inner-loop
+ when doing outer-loop vectorization. */
+ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
+
+ return pattern_stmt;
+}
+
+
/* Handle widening operation by a constant. At the moment we support MULT_EXPR
and LSHIFT_EXPR.