diff options
author | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2015-03-16 10:30:57 +0300 |
---|---|---|
committer | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2015-03-17 13:03:08 +0300 |
commit | 3951a3654b8197466bee3e6732b3bc94e4018f68 (patch) | |
tree | 5f71295bf4a3df8c9d8187ae983591466ff82a86 /gcc-4.9/gcc/tree-vect-data-refs.c | |
parent | 8075018d7ad15059179e6ff7d0dd12071e1749b9 (diff) | |
download | toolchain_gcc-3951a3654b8197466bee3e6732b3bc94e4018f68.tar.gz toolchain_gcc-3951a3654b8197466bee3e6732b3bc94e4018f68.tar.bz2 toolchain_gcc-3951a3654b8197466bee3e6732b3bc94e4018f68.zip |
[4.9] Several improvements in code generation for x86. Backport from trunk.
2014-11-21 Evgeny Stupachenko <evstupac@gmail.com>
PR target/60451
* config/i386/i386.c (expand_vec_perm_even_odd_pack): New.
(expand_vec_perm_even_odd_1): Add new expand for V8HI mode,
replace for V16QI, V16HI and V32QI modes.
(ix86_expand_vec_perm_const_1): Add new expand.
2014-06-11 Evgeny Stupachenko <evstupac@gmail.com>
* tree-vect-data-refs.c (vect_grouped_store_supported): New
check for stores group of length 3.
(vect_permute_store_chain): New permutations for stores group of
length 3.
* tree-vect-stmts.c (vect_model_store_cost): Change cost
of vec_perm_shuffle for the new permutations.
2014-11-28 Evgeny Stupachenko <evstupac@gmail.com>
* tree-vect-data-refs.c (vect_transform_grouped_load): Limit shift
permutations to loads group of size 3.
2014-12-18 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/62178
* tree-ssa-loop-ivopts.c (cheaper_cost_with_cand): New function.
(iv_ca_replace): New function.
(try_improve_iv_set): New parameter try_replace_p.
Break local optimal fixed-point by calling iv_ca_replace.
(find_optimal_iv_set_1): Pass new argument to try_improve_iv_set.
Change-Id: I5dca8236d3807cedc5e09d7eda65f0ccec9f5cb2
Signed-off-by: Alexander Ivchenko <alexander.ivchenko@intel.com>
Diffstat (limited to 'gcc-4.9/gcc/tree-vect-data-refs.c')
-rw-r--r-- | gcc-4.9/gcc/tree-vect-data-refs.c | 222 |
1 files changed, 174 insertions, 48 deletions
diff --git a/gcc-4.9/gcc/tree-vect-data-refs.c b/gcc-4.9/gcc/tree-vect-data-refs.c index 853b89a4a..7af32d1ff 100644 --- a/gcc-4.9/gcc/tree-vect-data-refs.c +++ b/gcc-4.9/gcc/tree-vect-data-refs.c @@ -4391,13 +4391,14 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) { enum machine_mode mode = TYPE_MODE (vectype); - /* vect_permute_store_chain requires the group size to be a power of two. */ - if (exact_log2 (count) == -1) + /* vect_permute_store_chain requires the group size to be equal to 3 or + be a power of two. */ + if (count != 3 && exact_log2 (count) == -1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "the size of the group of accesses" - " is not a power of 2\n"); + "the size of the group of accesses" + " is not a power of 2 or not eqaul to 3\n"); return false; } @@ -4406,23 +4407,76 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) { unsigned int i, nelt = GET_MODE_NUNITS (mode); unsigned char *sel = XALLOCAVEC (unsigned char, nelt); - for (i = 0; i < nelt / 2; i++) + + if (count == 3) { - sel[i * 2] = i; - sel[i * 2 + 1] = i + nelt; + unsigned int j0 = 0, j1 = 0, j2 = 0; + unsigned int i, j; + + for (j = 0; j < 3; j++) + { + int nelt0 = ((3 - j) * nelt) % 3; + int nelt1 = ((3 - j) * nelt + 1) % 3; + int nelt2 = ((3 - j) * nelt + 2) % 3; + for (i = 0; i < nelt; i++) + { + if (3 * i + nelt0 < nelt) + sel[3 * i + nelt0] = j0++; + if (3 * i + nelt1 < nelt) + sel[3 * i + nelt1] = nelt + j1++; + if (3 * i + nelt2 < nelt) + sel[3 * i + nelt2] = 0; + } + if (!can_vec_perm_p (mode, false, sel)) + { + if (dump_enabled_p ()) + dump_printf (MSG_MISSED_OPTIMIZATION, + "permutaion op not supported by target.\n"); + return false; + } + + for (i = 0; i < nelt; i++) + { + if (3 * i + nelt0 < nelt) + sel[3 * i + nelt0] = 3 * i + nelt0; + if (3 * i + nelt1 < nelt) + sel[3 * i + nelt1] = 3 * i + nelt1; + if (3 * i + nelt2 < nelt) + sel[3 * i + nelt2] = nelt + j2++; + } + if (!can_vec_perm_p (mode, false, sel)) + { + if (dump_enabled_p ()) + dump_printf (MSG_MISSED_OPTIMIZATION, + "permutaion op not supported by target.\n"); + return false; + } + } + return true; } - if (can_vec_perm_p (mode, false, sel)) + else { - for (i = 0; i < nelt; i++) - sel[i] += nelt / 2; - if (can_vec_perm_p (mode, false, sel)) - return true; + /* If length is not equal to 3 then only power of 2 is supported. */ + gcc_assert (exact_log2 (count) != -1); + + for (i = 0; i < nelt / 2; i++) + { + sel[i * 2] = i; + sel[i * 2 + 1] = i + nelt; + } + if (can_vec_perm_p (mode, false, sel)) + { + for (i = 0; i < nelt; i++) + sel[i] += nelt / 2; + if (can_vec_perm_p (mode, false, sel)) + return true; + } } } if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, - "interleave op not supported by target.\n"); + "permutaion op not supported by target.\n"); return false; } @@ -4442,9 +4496,9 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) /* Function vect_permute_store_chain. Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be - a power of 2, generate interleave_high/low stmts to reorder the data - correctly for the stores. Return the final references for stores in - RESULT_CHAIN. + a power of 2 or equal to 3, generate interleave_high/low stmts to reorder + the data correctly for the stores. Return the final references for stores + in RESULT_CHAIN. E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. The input is 4 vectors each containing 8 elements. We assign a number to @@ -4511,7 +4565,9 @@ vect_permute_store_chain (vec<tree> dr_chain, gimple perm_stmt; tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); tree perm_mask_low, perm_mask_high; - unsigned int i, n; + tree data_ref; + tree perm3_mask_low, perm3_mask_high; + unsigned int i, n, log_length = exact_log2 (length); unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype); unsigned char *sel = XALLOCAVEC (unsigned char, nelt); @@ -4519,47 +4575,116 @@ vect_permute_store_chain (vec<tree> dr_chain, memcpy (result_chain->address (), dr_chain.address (), length * sizeof (tree)); - for (i = 0, n = nelt / 2; i < n; i++) + if (length == 3) { - sel[i * 2] = i; - sel[i * 2 + 1] = i + nelt; - } - perm_mask_high = vect_gen_perm_mask (vectype, sel); - gcc_assert (perm_mask_high != NULL); + unsigned int j0 = 0, j1 = 0, j2 = 0; - for (i = 0; i < nelt; i++) - sel[i] += nelt / 2; - perm_mask_low = vect_gen_perm_mask (vectype, sel); - gcc_assert (perm_mask_low != NULL); + for (j = 0; j < 3; j++) + { + int nelt0 = ((3 - j) * nelt) % 3; + int nelt1 = ((3 - j) * nelt + 1) % 3; + int nelt2 = ((3 - j) * nelt + 2) % 3; - for (i = 0, n = exact_log2 (length); i < n; i++) - { - for (j = 0; j < length/2; j++) - { - vect1 = dr_chain[j]; - vect2 = dr_chain[j+length/2]; + for (i = 0; i < nelt; i++) + { + if (3 * i + nelt0 < nelt) + sel[3 * i + nelt0] = j0++; + if (3 * i + nelt1 < nelt) + sel[3 * i + nelt1] = nelt + j1++; + if (3 * i + nelt2 < nelt) + sel[3 * i + nelt2] = 0; + } + perm3_mask_low = vect_gen_perm_mask (vectype, sel); + gcc_assert (perm3_mask_low != NULL); + + for (i = 0; i < nelt; i++) + { + if (3 * i + nelt0 < nelt) + sel[3 * i + nelt0] = 3 * i + nelt0; + if (3 * i + nelt1 < nelt) + sel[3 * i + nelt1] = 3 * i + nelt1; + if (3 * i + nelt2 < nelt) + sel[3 * i + nelt2] = nelt + j2++; + } + perm3_mask_high = vect_gen_perm_mask (vectype, sel); + gcc_assert (perm3_mask_high != NULL); + + vect1 = dr_chain[0]; + vect2 = dr_chain[1]; /* Create interleaving stmt: - high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */ - high = make_temp_ssa_name (vectype, NULL, "vect_inter_high"); - perm_stmt - = gimple_build_assign_with_ops (VEC_PERM_EXPR, high, - vect1, vect2, perm_mask_high); + low = VEC_PERM_EXPR <vect1, vect2, + {j, nelt, *, j + 1, nelt + j + 1, *, + j + 2, nelt + j + 2, *, ...}> */ + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low"); + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, + vect1, vect2, + perm3_mask_low); vect_finish_stmt_generation (stmt, perm_stmt, gsi); - (*result_chain)[2*j] = high; + vect1 = data_ref; + vect2 = dr_chain[2]; /* Create interleaving stmt: - low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1, - nelt*3/2+1, ...}> */ - low = make_temp_ssa_name (vectype, NULL, "vect_inter_low"); - perm_stmt - = gimple_build_assign_with_ops (VEC_PERM_EXPR, low, - vect1, vect2, perm_mask_low); + low = VEC_PERM_EXPR <vect1, vect2, + {0, 1, nelt + j, 3, 4, nelt + j + 1, + 6, 7, nelt + j + 2, ...}> */ + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high"); + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, + vect1, vect2, + perm3_mask_high); vect_finish_stmt_generation (stmt, perm_stmt, gsi); - (*result_chain)[2*j+1] = low; + (*result_chain)[j] = data_ref; } - memcpy (dr_chain.address (), result_chain->address (), - length * sizeof (tree)); + } + else + { + /* If length is not equal to 3 then only power of 2 is supported. */ + gcc_assert (exact_log2 (length) != -1); + + for (i = 0, n = nelt / 2; i < n; i++) + { + sel[i * 2] = i; + sel[i * 2 + 1] = i + nelt; + } + perm_mask_high = vect_gen_perm_mask (vectype, sel); + gcc_assert (perm_mask_high != NULL); + + for (i = 0; i < nelt; i++) + sel[i] += nelt / 2; + perm_mask_low = vect_gen_perm_mask (vectype, sel); + gcc_assert (perm_mask_low != NULL); + + for (i = 0, n = log_length; i < n; i++) + { + for (j = 0; j < length/2; j++) + { + vect1 = dr_chain[j]; + vect2 = dr_chain[j+length/2]; + + /* Create interleaving stmt: + high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, + ...}> */ + high = make_temp_ssa_name (vectype, NULL, "vect_inter_high"); + perm_stmt + = gimple_build_assign_with_ops (VEC_PERM_EXPR, high, + vect1, vect2, perm_mask_high); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + (*result_chain)[2*j] = high; + + /* Create interleaving stmt: + low = VEC_PERM_EXPR <vect1, vect2, + {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1, + ...}> */ + low = make_temp_ssa_name (vectype, NULL, "vect_inter_low"); + perm_stmt + = gimple_build_assign_with_ops (VEC_PERM_EXPR, low, + vect1, vect2, perm_mask_low); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + (*result_chain)[2*j+1] = low; + } + memcpy (dr_chain.address (), result_chain->address (), + length * sizeof (tree)); + } } } @@ -5475,6 +5600,7 @@ vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size, get chain for loads group using vect_shift_permute_load_chain. */ mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt))); if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1 + || exact_log2 (size) != -1 || !vect_shift_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain)) vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain); |