diff options
author | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2014-07-11 15:24:10 +0400 |
---|---|---|
committer | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2014-08-06 16:24:16 +0400 |
commit | 55f9fbb03d0413cb8fe74e5ec5d6c2dd4280933e (patch) | |
tree | a276531909449c8ed589df86ad3cfdd3048b7400 /gcc-4.9/gcc/config/i386/i386.c | |
parent | 38a8aecfb882072900434499696b5c32a2274515 (diff) | |
download | toolchain_gcc-55f9fbb03d0413cb8fe74e5ec5d6c2dd4280933e.tar.gz toolchain_gcc-55f9fbb03d0413cb8fe74e5ec5d6c2dd4280933e.tar.bz2 toolchain_gcc-55f9fbb03d0413cb8fe74e5ec5d6c2dd4280933e.zip |
[4.8, 4.9] Backport of additional SLM tuning.
Six patches from trunk, reg-tested via 'make check':
2014-05-07 Evgeny Stupachenko <evstupac@gmail.com>
* tree-vect-data-refs.c (vect_grouped_load_supported): New
check for loads group of length 3.
(vect_permute_load_chain): New permutations for loads group of
length 3.
* tree-vect-stmts.c (vect_model_load_cost): Change cost
of vec_perm_shuffle for the new permutations.
2014-04-17 Evgeny Stupachenko <evstupac@gmail.com>
* config/i386/i386.c (x86_add_stmt_cost): Fix vector cost model for
Silvermont.
2014-04-17 Evgeny Stupachenko <evstupac@gmail.com>
* config/i386/x86-tune.def (TARGET_SLOW_PSHUFB): New tune definition.
* config/i386/i386.h (TARGET_SLOW_PSHUFB): New tune flag.
* config/i386/i386.c (expand_vec_perm_even_odd_1): Avoid byte shuffles
for TARGET_SLOW_PSHUFB
2014-04-17 Evgeny Stupachenko <evstupac@gmail.com>
* config/i386/i386.c (slm_cost): Adjust vec_to_scalar_cost.
* config/i386/i386.c (intel_cost): Ditto.
2014-06-18 Evgeny Stupachenko <evstupac@gmail.com>
* config/i386/i386.c (ix86_reassociation_width): Add alternative for
vector case.
* config/i386/i386.h (TARGET_VECTOR_PARALLEL_EXECUTION): New.
* config/i386/x86-tune.def (X86_TUNE_VECTOR_PARALLEL_EXECUTION): New.
* tree-vect-data-refs.c (vect_shift_permute_load_chain): New.
Introduces alternative way of loads group permutaions.
(vect_transform_grouped_load): Try alternative way of permutations.
2014-06-05 Evgeny Stupachenko <evstupac@gmail.com>
* config/i386/sse.md (*ssse3_palignr<mode>_perm): New.
* config/i386/predicates.md (palignr_operand): New.
Indicates if permutation is suitable for palignr instruction.
Change-Id: I5e505735ce3dc0ec3c2a1151713a119b24d712fe
Signed-off-by: Alexander Ivchenko <alexander.ivchenko@intel.com>
Diffstat (limited to 'gcc-4.9/gcc/config/i386/i386.c')
-rw-r--r-- | gcc-4.9/gcc/config/i386/i386.c | 32 |
1 files changed, 27 insertions, 5 deletions
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c index df504335e..4016b6052 100644 --- a/gcc-4.9/gcc/config/i386/i386.c +++ b/gcc-4.9/gcc/config/i386/i386.c @@ -82,6 +82,7 @@ along with GCC; see the file COPYING3. If not see #include "context.h" #include "pass_manager.h" #include "target-globals.h" +#include "tree-vectorizer.h" static rtx legitimize_dllimport_symbol (rtx, bool); static rtx legitimize_pe_coff_extern_decl (rtx, bool); @@ -1739,7 +1740,7 @@ struct processor_costs slm_cost = { 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ + 4, /* vec_to_scalar_cost. */ 1, /* scalar_to_vec_cost. */ 1, /* vec_align_load_cost. */ 2, /* vec_unalign_load_cost. */ @@ -1816,7 +1817,7 @@ struct processor_costs intel_cost = { 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ + 4, /* vec_to_scalar_cost. */ 1, /* scalar_to_vec_cost. */ 1, /* vec_align_load_cost. */ 2, /* vec_unalign_load_cost. */ @@ -44300,7 +44301,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) gcc_unreachable (); case V8HImode: - if (TARGET_SSSE3) + if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) return expand_vec_perm_pshufb2 (d); else { @@ -44323,7 +44324,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) break; case V16QImode: - if (TARGET_SSSE3) + if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) return expand_vec_perm_pshufb2 (d); else { @@ -46493,6 +46494,16 @@ ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, { int res = 1; + /* Vector part. */ + if (VECTOR_MODE_P (mode)) + { + if (TARGET_VECTOR_PARALLEL_EXECUTION) + return 2; + else + return 1; + } + + /* Scalar part. */ if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL) res = 2; else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL) @@ -46592,7 +46603,6 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, { unsigned *cost = (unsigned *) data; unsigned retval = 0; - tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); @@ -46603,6 +46613,18 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, count *= 50; /* FIXME. */ retval = (unsigned) (count * stmt_cost); + + /* We need to multiply all vector stmt cost by 1.7 (estimated cost) + for Silvermont as it has out of order integer pipeline and can execute + 2 scalar instruction per tick, but has in order SIMD pipeline. */ + if (TARGET_SILVERMONT || TARGET_INTEL) + if (stmt_info && stmt_info->stmt) + { + tree lhs_op = gimple_get_lhs (stmt_info->stmt); + if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE) + retval = (retval * 17) / 10; + } + cost[where] += retval; return retval; |