aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/tree-vect-data-refs.c
diff options
context:
space:
mode:
authorAlexander Ivchenko <alexander.ivchenko@intel.com>2015-03-16 10:30:57 +0300
committerAlexander Ivchenko <alexander.ivchenko@intel.com>2015-03-17 13:03:08 +0300
commit3951a3654b8197466bee3e6732b3bc94e4018f68 (patch)
tree5f71295bf4a3df8c9d8187ae983591466ff82a86 /gcc-4.9/gcc/tree-vect-data-refs.c
parent8075018d7ad15059179e6ff7d0dd12071e1749b9 (diff)
downloadtoolchain_gcc-3951a3654b8197466bee3e6732b3bc94e4018f68.tar.gz
toolchain_gcc-3951a3654b8197466bee3e6732b3bc94e4018f68.tar.bz2
toolchain_gcc-3951a3654b8197466bee3e6732b3bc94e4018f68.zip
[4.9] Several improvements in code generation for x86. Backport from trunk.
2014-11-21 Evgeny Stupachenko <evstupac@gmail.com> PR target/60451 * config/i386/i386.c (expand_vec_perm_even_odd_pack): New. (expand_vec_perm_even_odd_1): Add new expand for V8HI mode, replace for V16QI, V16HI and V32QI modes. (ix86_expand_vec_perm_const_1): Add new expand. 2014-06-11 Evgeny Stupachenko <evstupac@gmail.com> * tree-vect-data-refs.c (vect_grouped_store_supported): New check for stores group of length 3. (vect_permute_store_chain): New permutations for stores group of length 3. * tree-vect-stmts.c (vect_model_store_cost): Change cost of vec_perm_shuffle for the new permutations. 2014-11-28 Evgeny Stupachenko <evstupac@gmail.com> * tree-vect-data-refs.c (vect_transform_grouped_load): Limit shift permutations to loads group of size 3. 2014-12-18 Bin Cheng <bin.cheng@arm.com> PR tree-optimization/62178 * tree-ssa-loop-ivopts.c (cheaper_cost_with_cand): New function. (iv_ca_replace): New function. (try_improve_iv_set): New parameter try_replace_p. Break local optimal fixed-point by calling iv_ca_replace. (find_optimal_iv_set_1): Pass new argument to try_improve_iv_set. Change-Id: I5dca8236d3807cedc5e09d7eda65f0ccec9f5cb2 Signed-off-by: Alexander Ivchenko <alexander.ivchenko@intel.com>
Diffstat (limited to 'gcc-4.9/gcc/tree-vect-data-refs.c')
-rw-r--r--gcc-4.9/gcc/tree-vect-data-refs.c222
1 files changed, 174 insertions, 48 deletions
diff --git a/gcc-4.9/gcc/tree-vect-data-refs.c b/gcc-4.9/gcc/tree-vect-data-refs.c
index 853b89a4a..7af32d1ff 100644
--- a/gcc-4.9/gcc/tree-vect-data-refs.c
+++ b/gcc-4.9/gcc/tree-vect-data-refs.c
@@ -4391,13 +4391,14 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
{
enum machine_mode mode = TYPE_MODE (vectype);
- /* vect_permute_store_chain requires the group size to be a power of two. */
- if (exact_log2 (count) == -1)
+ /* vect_permute_store_chain requires the group size to be equal to 3 or
+ be a power of two. */
+ if (count != 3 && exact_log2 (count) == -1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "the size of the group of accesses"
- " is not a power of 2\n");
+ "the size of the group of accesses"
+ " is not a power of 2 or not eqaul to 3\n");
return false;
}
@@ -4406,23 +4407,76 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
{
unsigned int i, nelt = GET_MODE_NUNITS (mode);
unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
- for (i = 0; i < nelt / 2; i++)
+
+ if (count == 3)
{
- sel[i * 2] = i;
- sel[i * 2 + 1] = i + nelt;
+ unsigned int j0 = 0, j1 = 0, j2 = 0;
+ unsigned int i, j;
+
+ for (j = 0; j < 3; j++)
+ {
+ int nelt0 = ((3 - j) * nelt) % 3;
+ int nelt1 = ((3 - j) * nelt + 1) % 3;
+ int nelt2 = ((3 - j) * nelt + 2) % 3;
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * i + nelt0 < nelt)
+ sel[3 * i + nelt0] = j0++;
+ if (3 * i + nelt1 < nelt)
+ sel[3 * i + nelt1] = nelt + j1++;
+ if (3 * i + nelt2 < nelt)
+ sel[3 * i + nelt2] = 0;
+ }
+ if (!can_vec_perm_p (mode, false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "permutaion op not supported by target.\n");
+ return false;
+ }
+
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * i + nelt0 < nelt)
+ sel[3 * i + nelt0] = 3 * i + nelt0;
+ if (3 * i + nelt1 < nelt)
+ sel[3 * i + nelt1] = 3 * i + nelt1;
+ if (3 * i + nelt2 < nelt)
+ sel[3 * i + nelt2] = nelt + j2++;
+ }
+ if (!can_vec_perm_p (mode, false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "permutaion op not supported by target.\n");
+ return false;
+ }
+ }
+ return true;
}
- if (can_vec_perm_p (mode, false, sel))
+ else
{
- for (i = 0; i < nelt; i++)
- sel[i] += nelt / 2;
- if (can_vec_perm_p (mode, false, sel))
- return true;
+ /* If length is not equal to 3 then only power of 2 is supported. */
+ gcc_assert (exact_log2 (count) != -1);
+
+ for (i = 0; i < nelt / 2; i++)
+ {
+ sel[i * 2] = i;
+ sel[i * 2 + 1] = i + nelt;
+ }
+ if (can_vec_perm_p (mode, false, sel))
+ {
+ for (i = 0; i < nelt; i++)
+ sel[i] += nelt / 2;
+ if (can_vec_perm_p (mode, false, sel))
+ return true;
+ }
}
}
if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION,
- "interleave op not supported by target.\n");
+ "permutaion op not supported by target.\n");
return false;
}
@@ -4442,9 +4496,9 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
/* Function vect_permute_store_chain.
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
- a power of 2, generate interleave_high/low stmts to reorder the data
- correctly for the stores. Return the final references for stores in
- RESULT_CHAIN.
+ a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
+ the data correctly for the stores. Return the final references for stores
+ in RESULT_CHAIN.
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
The input is 4 vectors each containing 8 elements. We assign a number to
@@ -4511,7 +4565,9 @@ vect_permute_store_chain (vec<tree> dr_chain,
gimple perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
tree perm_mask_low, perm_mask_high;
- unsigned int i, n;
+ tree data_ref;
+ tree perm3_mask_low, perm3_mask_high;
+ unsigned int i, n, log_length = exact_log2 (length);
unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
@@ -4519,47 +4575,116 @@ vect_permute_store_chain (vec<tree> dr_chain,
memcpy (result_chain->address (), dr_chain.address (),
length * sizeof (tree));
- for (i = 0, n = nelt / 2; i < n; i++)
+ if (length == 3)
{
- sel[i * 2] = i;
- sel[i * 2 + 1] = i + nelt;
- }
- perm_mask_high = vect_gen_perm_mask (vectype, sel);
- gcc_assert (perm_mask_high != NULL);
+ unsigned int j0 = 0, j1 = 0, j2 = 0;
- for (i = 0; i < nelt; i++)
- sel[i] += nelt / 2;
- perm_mask_low = vect_gen_perm_mask (vectype, sel);
- gcc_assert (perm_mask_low != NULL);
+ for (j = 0; j < 3; j++)
+ {
+ int nelt0 = ((3 - j) * nelt) % 3;
+ int nelt1 = ((3 - j) * nelt + 1) % 3;
+ int nelt2 = ((3 - j) * nelt + 2) % 3;
- for (i = 0, n = exact_log2 (length); i < n; i++)
- {
- for (j = 0; j < length/2; j++)
- {
- vect1 = dr_chain[j];
- vect2 = dr_chain[j+length/2];
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * i + nelt0 < nelt)
+ sel[3 * i + nelt0] = j0++;
+ if (3 * i + nelt1 < nelt)
+ sel[3 * i + nelt1] = nelt + j1++;
+ if (3 * i + nelt2 < nelt)
+ sel[3 * i + nelt2] = 0;
+ }
+ perm3_mask_low = vect_gen_perm_mask (vectype, sel);
+ gcc_assert (perm3_mask_low != NULL);
+
+ for (i = 0; i < nelt; i++)
+ {
+ if (3 * i + nelt0 < nelt)
+ sel[3 * i + nelt0] = 3 * i + nelt0;
+ if (3 * i + nelt1 < nelt)
+ sel[3 * i + nelt1] = 3 * i + nelt1;
+ if (3 * i + nelt2 < nelt)
+ sel[3 * i + nelt2] = nelt + j2++;
+ }
+ perm3_mask_high = vect_gen_perm_mask (vectype, sel);
+ gcc_assert (perm3_mask_high != NULL);
+
+ vect1 = dr_chain[0];
+ vect2 = dr_chain[1];
/* Create interleaving stmt:
- high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
- high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
- perm_stmt
- = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
- vect1, vect2, perm_mask_high);
+ low = VEC_PERM_EXPR <vect1, vect2,
+ {j, nelt, *, j + 1, nelt + j + 1, *,
+ j + 2, nelt + j + 2, *, ...}> */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
+ perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
+ vect1, vect2,
+ perm3_mask_low);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[2*j] = high;
+ vect1 = data_ref;
+ vect2 = dr_chain[2];
/* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
- nelt*3/2+1, ...}> */
- low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
- perm_stmt
- = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
- vect1, vect2, perm_mask_low);
+ low = VEC_PERM_EXPR <vect1, vect2,
+ {0, 1, nelt + j, 3, 4, nelt + j + 1,
+ 6, 7, nelt + j + 2, ...}> */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
+ perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
+ vect1, vect2,
+ perm3_mask_high);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[2*j+1] = low;
+ (*result_chain)[j] = data_ref;
}
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
+ }
+ else
+ {
+ /* If length is not equal to 3 then only power of 2 is supported. */
+ gcc_assert (exact_log2 (length) != -1);
+
+ for (i = 0, n = nelt / 2; i < n; i++)
+ {
+ sel[i * 2] = i;
+ sel[i * 2 + 1] = i + nelt;
+ }
+ perm_mask_high = vect_gen_perm_mask (vectype, sel);
+ gcc_assert (perm_mask_high != NULL);
+
+ for (i = 0; i < nelt; i++)
+ sel[i] += nelt / 2;
+ perm_mask_low = vect_gen_perm_mask (vectype, sel);
+ gcc_assert (perm_mask_low != NULL);
+
+ for (i = 0, n = log_length; i < n; i++)
+ {
+ for (j = 0; j < length/2; j++)
+ {
+ vect1 = dr_chain[j];
+ vect2 = dr_chain[j+length/2];
+
+ /* Create interleaving stmt:
+ high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
+ ...}> */
+ high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
+ perm_stmt
+ = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
+ vect1, vect2, perm_mask_high);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[2*j] = high;
+
+ /* Create interleaving stmt:
+ low = VEC_PERM_EXPR <vect1, vect2,
+ {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
+ ...}> */
+ low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
+ perm_stmt
+ = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
+ vect1, vect2, perm_mask_low);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[2*j+1] = low;
+ }
+ memcpy (dr_chain.address (), result_chain->address (),
+ length * sizeof (tree));
+ }
}
}
@@ -5475,6 +5600,7 @@ vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
get chain for loads group using vect_shift_permute_load_chain. */
mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)));
if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
+ || exact_log2 (size) != -1
|| !vect_shift_permute_load_chain (dr_chain, size, stmt,
gsi, &result_chain))
vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);